From a440943e68cd1b5a853a6f60865967b7cc2539eb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Sep 2021 08:59:58 +0200
Subject: unicode: remove the charset field from struct unicode_map

It is hardcoded and only used for a f2fs sysfs file where it can be
hardcoded just as easily.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 include/linux/unicode.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 74484d44c755..6a392cd9f076 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -6,7 +6,6 @@
 #include <linux/dcache.h>
 
 struct unicode_map {
-	const char *charset;
 	int version;
 };
 
-- 
cgit v1.2.3


From f3a9c82396006a5664f6e398d6928799d29de76e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Sep 2021 08:59:59 +0200
Subject: unicode: mark the version field in struct unicode_map unsigned

unicode version tripplets are always unsigned.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 include/linux/unicode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 6a392cd9f076..0744f81c4b5f 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -6,7 +6,7 @@
 #include <linux/dcache.h>
 
 struct unicode_map {
-	int version;
+	unsigned int version;
 };
 
 int utf8_validate(const struct unicode_map *um, const struct qstr *str);
-- 
cgit v1.2.3


From 49bd03cc7e95cb78420305ca2f5ef67497b6fa80 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Sep 2021 09:00:00 +0200
Subject: unicode: pass a UNICODE_AGE() tripple to utf8_load

Don't bother with pointless string parsing when the caller can just pass
the version in the format that the core expects.  Also remove the
fallback to the latest version that none of the callers actually uses.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 include/linux/unicode.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 0744f81c4b5f..77bb915fd1f0 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -5,6 +5,29 @@
 #include <linux/init.h>
 #include <linux/dcache.h>
 
+#define UNICODE_MAJ_SHIFT		16
+#define UNICODE_MIN_SHIFT		8
+
+#define UNICODE_AGE(MAJ, MIN, REV)			\
+	(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) |	\
+	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
+	 ((unsigned int)(REV)))
+
+static inline u8 unicode_major(unsigned int age)
+{
+	return (age >> UNICODE_MAJ_SHIFT) & 0xff;
+}
+
+static inline u8 unicode_minor(unsigned int age)
+{
+	return (age >> UNICODE_MIN_SHIFT) & 0xff;
+}
+
+static inline u8 unicode_rev(unsigned int age)
+{
+	return age & 0xff;
+}
+
 struct unicode_map {
 	unsigned int version;
 };
@@ -29,7 +52,7 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
 int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
 		       struct qstr *str);
 
-struct unicode_map *utf8_load(const char *version);
+struct unicode_map *utf8_load(unsigned int version);
 void utf8_unload(struct unicode_map *um);
 
 #endif /* _LINUX_UNICODE_H */
-- 
cgit v1.2.3


From 6ca99ce756c27852d1ea1e555045de1c920f30ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Sep 2021 09:00:04 +0200
Subject: unicode: cache the normalization tables in struct unicode_map

Instead of repeatedly looking up the version add pointers to the
NFD and NFD+CF tables to struct unicode_map, and pass a
unicode_map plus index to the functions using the normalization
tables.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 include/linux/unicode.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 77bb915fd1f0..526ca8b8391a 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -5,6 +5,8 @@
 #include <linux/init.h>
 #include <linux/dcache.h>
 
+struct utf8data;
+
 #define UNICODE_MAJ_SHIFT		16
 #define UNICODE_MIN_SHIFT		8
 
@@ -28,8 +30,25 @@ static inline u8 unicode_rev(unsigned int age)
 	return age & 0xff;
 }
 
+/*
+ * Two normalization forms are supported:
+ * 1) NFDI
+ *   - Apply unicode normalization form NFD.
+ *   - Remove any Default_Ignorable_Code_Point.
+ * 2) NFDICF
+ *   - Apply unicode normalization form NFD.
+ *   - Remove any Default_Ignorable_Code_Point.
+ *   - Apply a full casefold (C + F).
+ */
+enum utf8_normalization {
+	UTF8_NFDI = 0,
+	UTF8_NFDICF,
+	UTF8_NMAX,
+};
+
 struct unicode_map {
 	unsigned int version;
+	const struct utf8data *ntab[UTF8_NMAX];
 };
 
 int utf8_validate(const struct unicode_map *um, const struct qstr *str);
-- 
cgit v1.2.3


From 2b3d047870120bcd46d7cc257d19ff49328fd585 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Sep 2021 09:00:05 +0200
Subject: unicode: Add utf8-data module

utf8data.h contains a large database table which is an auto-generated
decodification trie for the unicode normalization functions.

Allow building it into a separate module.

Based on a patch from Shreeya Patel <shreeya.patel@collabora.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 include/linux/unicode.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 526ca8b8391a..4d39e6e11a95 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -6,6 +6,7 @@
 #include <linux/dcache.h>
 
 struct utf8data;
+struct utf8data_table;
 
 #define UNICODE_MAJ_SHIFT		16
 #define UNICODE_MIN_SHIFT		8
@@ -49,6 +50,7 @@ enum utf8_normalization {
 struct unicode_map {
 	unsigned int version;
 	const struct utf8data *ntab[UTF8_NMAX];
+	const struct utf8data_table *tables;
 };
 
 int utf8_validate(const struct unicode_map *um, const struct qstr *str);
-- 
cgit v1.2.3


From 107fe904302092c683cf5462b4af3cb3cfa40998 Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Tue, 5 Oct 2021 22:23:13 +0200
Subject: drm/connector: Add support for privacy-screen properties (v4)

Add support for generic electronic privacy screen properties, that
can be added by systems that have an integrated EPS.

Changes in v2 (Hans de Goede)
- Create 2 properties, "privacy-screen sw-state" and
  "privacy-screen hw-state", to deal with devices where the OS might be
  locked out of making state changes
- Write kerneldoc explaining how the 2 properties work together, what
  happens when changes to the state are made outside of the DRM code's
  control, etc.

Changes in v3 (Hans de Goede)
- Some small tweaks to the kerneldoc describing the 2 properties

Changes in v4 (Hans de Goede)
- Change the "Enabled, locked" and "Disabled, locked" hw-state enum value
  names to "Enabled-locked" and "Disabled-locked". The xrandr command shows
  all possible enum values separated by commas in its output, so having a
  comma in an enum name is not a good idea.
- Do not add a privacy_screen_hw_state member to drm_connector_state
  since this property is immutable its value must be directly stored in the
  obj->properties->values array

Signed-off-by: Rajat Jain <rajatja@google.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Mario Limonciello <Mario.limonciello@dell.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Co-developed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-2-hdegoede@redhat.com
---
 include/drm/drm_connector.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 79fa34e5ccdb..1acbcf0626ce 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -320,6 +320,30 @@ struct drm_monitor_range_info {
 	u8 max_vfreq;
 };
 
+/**
+ * enum drm_privacy_screen_status - privacy screen status
+ *
+ * This enum is used to track and control the state of the integrated privacy
+ * screen present on some display panels, via the "privacy-screen sw-state"
+ * and "privacy-screen hw-state" properties. Note the _LOCKED enum values
+ * are only valid for the "privacy-screen hw-state" property.
+ *
+ * @PRIVACY_SCREEN_DISABLED:
+ *  The privacy-screen on the panel is disabled
+ * @PRIVACY_SCREEN_ENABLED:
+ *  The privacy-screen on the panel is enabled
+ * @PRIVACY_SCREEN_DISABLED_LOCKED:
+ *  The privacy-screen on the panel is disabled and locked (cannot be changed)
+ * @PRIVACY_SCREEN_ENABLED_LOCKED:
+ *  The privacy-screen on the panel is enabled and locked (cannot be changed)
+ */
+enum drm_privacy_screen_status {
+	PRIVACY_SCREEN_DISABLED = 0,
+	PRIVACY_SCREEN_ENABLED,
+	PRIVACY_SCREEN_DISABLED_LOCKED,
+	PRIVACY_SCREEN_ENABLED_LOCKED,
+};
+
 /*
  * This is a consolidated colorimetry list supported by HDMI and
  * DP protocol standard. The respective connectors will register
@@ -781,6 +805,12 @@ struct drm_connector_state {
 	 */
 	u8 max_bpc;
 
+	/**
+	 * @privacy_screen_sw_state: See :ref:`Standard Connector
+	 * Properties<standard_connector_properties>`
+	 */
+	enum drm_privacy_screen_status privacy_screen_sw_state;
+
 	/**
 	 * @hdr_output_metadata:
 	 * DRM blob property for HDR output metadata
@@ -1409,6 +1439,18 @@ struct drm_connector {
 	 */
 	struct drm_property *max_bpc_property;
 
+	/**
+	 * @privacy_screen_sw_state_property: Optional atomic property for the
+	 * connector to control the integrated privacy screen.
+	 */
+	struct drm_property *privacy_screen_sw_state_property;
+
+	/**
+	 * @privacy_screen_hw_state_property: Optional atomic property for the
+	 * connector to report the actual integrated privacy screen state.
+	 */
+	struct drm_property *privacy_screen_hw_state_property;
+
 #define DRM_CONNECTOR_POLL_HPD (1 << 0)
 #define DRM_CONNECTOR_POLL_CONNECT (1 << 1)
 #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2)
@@ -1732,6 +1774,8 @@ int drm_connector_set_panel_orientation_with_quirk(
 	int width, int height);
 int drm_connector_attach_max_bpc_property(struct drm_connector *connector,
 					  int min, int max);
+void drm_connector_create_privacy_screen_properties(struct drm_connector *conn);
+void drm_connector_attach_privacy_screen_properties(struct drm_connector *conn);
 
 /**
  * struct drm_tile_group - Tile group metadata
-- 
cgit v1.2.3


From a1a98689301b9af0313e4c1ba44558e8b67ff76e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 5 Oct 2021 22:23:14 +0200
Subject: drm: Add privacy-screen class (v4)

On some new laptops the LCD panel has a builtin electronic privacy-screen.
We want to export this functionality as a property on the drm connector
object. But often this functionality is not exposed on the GPU but on some
other (ACPI) device.

This commit adds a privacy-screen class allowing the driver for these
other devices to register themselves as a privacy-screen provider; and
allowing the drm/kms code to get a privacy-screen provider associated
with a specific GPU/connector combo.

Changes in v2:
- Make CONFIG_DRM_PRIVACY_SCREEN a bool which controls if the drm_privacy
  code gets built as part of the main drm module rather then making it
  a tristate which builds its own module.
- Add a #if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) check to
  drm_privacy_screen_consumer.h and define stubs when the check fails.
  Together these 2 changes fix several dependency issues.
- Remove module related code now that this is part of the main drm.ko
- Use drm_class as class for the privacy-screen devices instead of
  adding a separate class for this

Changes in v3:
- Make the static inline drm_privacy_screen_get_state() stub set sw_state
  and hw_state to PRIVACY_SCREEN_DISABLED to squelch an uninitialized
  variable warning when CONFIG_DRM_PRIVICAY_SCREEN is not set

Changes in v4:
- Make drm_privacy_screen_set_sw_state() skip calling out to the hw if
  hw_state == new_sw_state

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-3-hdegoede@redhat.com
---
 include/drm/drm_privacy_screen_consumer.h | 50 +++++++++++++++++++
 include/drm/drm_privacy_screen_driver.h   | 80 +++++++++++++++++++++++++++++++
 include/drm/drm_privacy_screen_machine.h  | 41 ++++++++++++++++
 3 files changed, 171 insertions(+)
 create mode 100644 include/drm/drm_privacy_screen_consumer.h
 create mode 100644 include/drm/drm_privacy_screen_driver.h
 create mode 100644 include/drm/drm_privacy_screen_machine.h

(limited to 'include')

diff --git a/include/drm/drm_privacy_screen_consumer.h b/include/drm/drm_privacy_screen_consumer.h
new file mode 100644
index 000000000000..0cbd23b0453d
--- /dev/null
+++ b/include/drm/drm_privacy_screen_consumer.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ */
+
+#ifndef __DRM_PRIVACY_SCREEN_CONSUMER_H__
+#define __DRM_PRIVACY_SCREEN_CONSUMER_H__
+
+#include <linux/device.h>
+#include <drm/drm_connector.h>
+
+struct drm_privacy_screen;
+
+#if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN)
+struct drm_privacy_screen *drm_privacy_screen_get(struct device *dev,
+						  const char *con_id);
+void drm_privacy_screen_put(struct drm_privacy_screen *priv);
+
+int drm_privacy_screen_set_sw_state(struct drm_privacy_screen *priv,
+				    enum drm_privacy_screen_status sw_state);
+void drm_privacy_screen_get_state(struct drm_privacy_screen *priv,
+				  enum drm_privacy_screen_status *sw_state_ret,
+				  enum drm_privacy_screen_status *hw_state_ret);
+#else
+static inline struct drm_privacy_screen *drm_privacy_screen_get(struct device *dev,
+								const char *con_id)
+{
+	return ERR_PTR(-ENODEV);
+}
+static inline void drm_privacy_screen_put(struct drm_privacy_screen *priv)
+{
+}
+static inline int drm_privacy_screen_set_sw_state(struct drm_privacy_screen *priv,
+						  enum drm_privacy_screen_status sw_state)
+{
+	return -ENODEV;
+}
+static inline void drm_privacy_screen_get_state(struct drm_privacy_screen *priv,
+						enum drm_privacy_screen_status *sw_state_ret,
+						enum drm_privacy_screen_status *hw_state_ret)
+{
+	*sw_state_ret = PRIVACY_SCREEN_DISABLED;
+	*hw_state_ret = PRIVACY_SCREEN_DISABLED;
+}
+#endif
+
+#endif
diff --git a/include/drm/drm_privacy_screen_driver.h b/include/drm/drm_privacy_screen_driver.h
new file mode 100644
index 000000000000..5187ae52eb03
--- /dev/null
+++ b/include/drm/drm_privacy_screen_driver.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ */
+
+#ifndef __DRM_PRIVACY_SCREEN_DRIVER_H__
+#define __DRM_PRIVACY_SCREEN_DRIVER_H__
+
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <drm/drm_connector.h>
+
+struct drm_privacy_screen;
+
+/**
+ * struct drm_privacy_screen_ops - drm_privacy_screen operations
+ *
+ * Defines the operations which the privacy-screen class code may call.
+ * These functions should be implemented by the privacy-screen driver.
+ */
+struct drm_privacy_screen_ops {
+	/**
+	 * @set_sw_state: Called to request a change of the privacy-screen
+	 * state. The privacy-screen class code contains a check to avoid this
+	 * getting called when the hw_state reports the state is locked.
+	 * It is the driver's responsibility to update sw_state and hw_state.
+	 * This is always called with the drm_privacy_screen's lock held.
+	 */
+	int (*set_sw_state)(struct drm_privacy_screen *priv,
+			    enum drm_privacy_screen_status sw_state);
+	/**
+	 * @get_hw_state: Called to request that the driver gets the current
+	 * privacy-screen state from the hardware and then updates sw_state and
+	 * hw_state accordingly. This will be called by the core just before
+	 * the privacy-screen is registered in sysfs.
+	 */
+	void (*get_hw_state)(struct drm_privacy_screen *priv);
+};
+
+/**
+ * struct drm_privacy_screen - central privacy-screen structure
+ *
+ * Central privacy-screen structure, this contains the struct device used
+ * to register the screen in sysfs, the screen's state, ops, etc.
+ */
+struct drm_privacy_screen {
+	/** @dev: device used to register the privacy-screen in sysfs. */
+	struct device dev;
+	/** @lock: mutex protection all fields in this struct. */
+	struct mutex lock;
+	/** @list: privacy-screen devices list list-entry. */
+	struct list_head list;
+	/**
+	 * @ops: &struct drm_privacy_screen_ops for this privacy-screen.
+	 * This is NULL if the driver has unregistered the privacy-screen.
+	 */
+	const struct drm_privacy_screen_ops *ops;
+	/**
+	 * @sw_state: The privacy-screen's software state, see
+	 * :ref:`Standard Connector Properties<standard_connector_properties>`
+	 * for more info.
+	 */
+	enum drm_privacy_screen_status sw_state;
+	/**
+	 * @hw_state: The privacy-screen's hardware state, see
+	 * :ref:`Standard Connector Properties<standard_connector_properties>`
+	 * for more info.
+	 */
+	enum drm_privacy_screen_status hw_state;
+};
+
+struct drm_privacy_screen *drm_privacy_screen_register(
+	struct device *parent, const struct drm_privacy_screen_ops *ops);
+void drm_privacy_screen_unregister(struct drm_privacy_screen *priv);
+
+#endif
diff --git a/include/drm/drm_privacy_screen_machine.h b/include/drm/drm_privacy_screen_machine.h
new file mode 100644
index 000000000000..aaa0d38cce92
--- /dev/null
+++ b/include/drm/drm_privacy_screen_machine.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ */
+
+#ifndef __DRM_PRIVACY_SCREEN_MACHINE_H__
+#define __DRM_PRIVACY_SCREEN_MACHINE_H__
+
+#include <linux/list.h>
+
+/**
+ * struct drm_privacy_screen_lookup -  static privacy-screen lookup list entry
+ *
+ * Used for the static lookup-list for mapping privacy-screen consumer
+ * dev-connector pairs to a privacy-screen provider.
+ */
+struct drm_privacy_screen_lookup {
+	/** @list: Lookup list list-entry. */
+	struct list_head list;
+	/** @dev_id: Consumer device name or NULL to match all devices. */
+	const char *dev_id;
+	/** @con_id: Consumer connector name or NULL to match all connectors. */
+	const char *con_id;
+	/** @provider: dev_name() of the privacy_screen provider. */
+	const char *provider;
+};
+
+void drm_privacy_screen_lookup_add(struct drm_privacy_screen_lookup *lookup);
+void drm_privacy_screen_lookup_remove(struct drm_privacy_screen_lookup *lookup);
+
+static inline void drm_privacy_screen_lookup_init(void)
+{
+}
+static inline void drm_privacy_screen_lookup_exit(void)
+{
+}
+
+#endif
-- 
cgit v1.2.3


From befe5404a00b3b1547c944738df4a9229909bdc9 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 5 Oct 2021 22:23:15 +0200
Subject: drm/privacy-screen: Add X86 specific arch init code

Add X86 specific arch init code, which fills the privacy-screen lookup
table by checking for various vendor specific ACPI interfaces for
controlling the privacy-screen.

This initial version only checks for the Lenovo Thinkpad specific ACPI
methods for privacy-screen control.

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-4-hdegoede@redhat.com
---
 include/drm/drm_privacy_screen_machine.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_privacy_screen_machine.h b/include/drm/drm_privacy_screen_machine.h
index aaa0d38cce92..02e5371904d3 100644
--- a/include/drm/drm_privacy_screen_machine.h
+++ b/include/drm/drm_privacy_screen_machine.h
@@ -31,11 +31,16 @@ struct drm_privacy_screen_lookup {
 void drm_privacy_screen_lookup_add(struct drm_privacy_screen_lookup *lookup);
 void drm_privacy_screen_lookup_remove(struct drm_privacy_screen_lookup *lookup);
 
+#if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) && IS_ENABLED(CONFIG_X86)
+void drm_privacy_screen_lookup_init(void);
+void drm_privacy_screen_lookup_exit(void);
+#else
 static inline void drm_privacy_screen_lookup_init(void)
 {
 }
 static inline void drm_privacy_screen_lookup_exit(void)
 {
 }
+#endif
 
 #endif
-- 
cgit v1.2.3


From 8a12b170558aabb31cc98fda0da6a56b518cadaa Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 5 Oct 2021 22:23:16 +0200
Subject: drm/privacy-screen: Add notifier support (v2)

Add support for privacy-screen consumers to register a notifier to
be notified of external (e.g. done by the hw itself on a hotkey press)
state changes.

Changes in v2:
- Drop WARN_ON(mutex_is_locked(&priv->lock)) check in
  drm_privacy_screen_call_notifier_chain() it may be locked by
  another thread, which would lead to a false-positive triggering
  of the check

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-5-hdegoede@redhat.com
---
 include/drm/drm_privacy_screen_consumer.h | 15 +++++++++++++++
 include/drm/drm_privacy_screen_driver.h   |  4 ++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_privacy_screen_consumer.h b/include/drm/drm_privacy_screen_consumer.h
index 0cbd23b0453d..7f66a90d15b7 100644
--- a/include/drm/drm_privacy_screen_consumer.h
+++ b/include/drm/drm_privacy_screen_consumer.h
@@ -24,6 +24,11 @@ int drm_privacy_screen_set_sw_state(struct drm_privacy_screen *priv,
 void drm_privacy_screen_get_state(struct drm_privacy_screen *priv,
 				  enum drm_privacy_screen_status *sw_state_ret,
 				  enum drm_privacy_screen_status *hw_state_ret);
+
+int drm_privacy_screen_register_notifier(struct drm_privacy_screen *priv,
+					 struct notifier_block *nb);
+int drm_privacy_screen_unregister_notifier(struct drm_privacy_screen *priv,
+					   struct notifier_block *nb);
 #else
 static inline struct drm_privacy_screen *drm_privacy_screen_get(struct device *dev,
 								const char *con_id)
@@ -45,6 +50,16 @@ static inline void drm_privacy_screen_get_state(struct drm_privacy_screen *priv,
 	*sw_state_ret = PRIVACY_SCREEN_DISABLED;
 	*hw_state_ret = PRIVACY_SCREEN_DISABLED;
 }
+static inline int drm_privacy_screen_register_notifier(struct drm_privacy_screen *priv,
+						       struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+static inline int drm_privacy_screen_unregister_notifier(struct drm_privacy_screen *priv,
+							 struct notifier_block *nb)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif
diff --git a/include/drm/drm_privacy_screen_driver.h b/include/drm/drm_privacy_screen_driver.h
index 5187ae52eb03..24591b607675 100644
--- a/include/drm/drm_privacy_screen_driver.h
+++ b/include/drm/drm_privacy_screen_driver.h
@@ -54,6 +54,8 @@ struct drm_privacy_screen {
 	struct mutex lock;
 	/** @list: privacy-screen devices list list-entry. */
 	struct list_head list;
+	/** @notifier_head: privacy-screen notifier head. */
+	struct blocking_notifier_head notifier_head;
 	/**
 	 * @ops: &struct drm_privacy_screen_ops for this privacy-screen.
 	 * This is NULL if the driver has unregistered the privacy-screen.
@@ -77,4 +79,6 @@ struct drm_privacy_screen *drm_privacy_screen_register(
 	struct device *parent, const struct drm_privacy_screen_ops *ops);
 void drm_privacy_screen_unregister(struct drm_privacy_screen *priv);
 
+void drm_privacy_screen_call_notifier_chain(struct drm_privacy_screen *priv);
+
 #endif
-- 
cgit v1.2.3


From 334f74ee85dc26a50c1a2b0da82517595191f92f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 5 Oct 2021 22:23:17 +0200
Subject: drm/connector: Add a drm_connector privacy-screen helper functions
 (v2)

Add 2 drm_connector privacy-screen helper functions:

1. drm_connector_attach_privacy_screen_provider(), this function creates
and attaches the standard privacy-screen properties and registers a
generic notifier for generating sysfs-connector-status-events on external
changes to the privacy-screen status.

2. drm_connector_update_privacy_screen(), update the privacy-screen's
sw_state if the connector has a privacy-screen.

Changes in v2:
- Do not update connector->state->privacy_screen_sw_state on
  atomic-commits.
- Change drm_connector_update_privacy_screen() to take drm_connector_state
  as argument instead of a full drm_atomic_state. This allows the helper
  to be called by drivers when they are enabling crtcs/encoders/connectors.

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-6-hdegoede@redhat.com
---
 include/drm/drm_connector.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 1acbcf0626ce..b83ba0285f29 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -27,6 +27,7 @@
 #include <linux/llist.h>
 #include <linux/ctype.h>
 #include <linux/hdmi.h>
+#include <linux/notifier.h>
 #include <drm/drm_mode_object.h>
 #include <drm/drm_util.h>
 
@@ -40,6 +41,7 @@ struct drm_encoder;
 struct drm_property;
 struct drm_property_blob;
 struct drm_printer;
+struct drm_privacy_screen;
 struct edid;
 struct i2c_adapter;
 
@@ -1439,6 +1441,12 @@ struct drm_connector {
 	 */
 	struct drm_property *max_bpc_property;
 
+	/** @privacy_screen: drm_privacy_screen for this connector, or NULL. */
+	struct drm_privacy_screen *privacy_screen;
+
+	/** @privacy_screen_notifier: privacy-screen notifier_block */
+	struct notifier_block privacy_screen_notifier;
+
 	/**
 	 * @privacy_screen_sw_state_property: Optional atomic property for the
 	 * connector to control the integrated privacy screen.
@@ -1776,6 +1784,9 @@ int drm_connector_attach_max_bpc_property(struct drm_connector *connector,
 					  int min, int max);
 void drm_connector_create_privacy_screen_properties(struct drm_connector *conn);
 void drm_connector_attach_privacy_screen_properties(struct drm_connector *conn);
+void drm_connector_attach_privacy_screen_provider(
+	struct drm_connector *connector, struct drm_privacy_screen *priv);
+void drm_connector_update_privacy_screen(const struct drm_connector_state *connector_state);
 
 /**
  * struct drm_tile_group - Tile group metadata
-- 
cgit v1.2.3


From 7c4dd0a266527ffa7ed8d424facaba171618820a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 13 Oct 2021 00:42:52 +0200
Subject: drm: of: Add drm_of_lvds_get_data_mapping

Add helper function to convert DT "data-mapping" property string value
into media bus format value, and deduplicate the code in panel-lvds.c
and lvds-codec.c .

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
To: dri-devel@lists.freedesktop.org
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211012224252.29185-1-marex@denx.de
---
 include/drm/drm_of.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h
index b9b093add92e..99f79ac8b4cd 100644
--- a/include/drm/drm_of.h
+++ b/include/drm/drm_of.h
@@ -49,6 +49,7 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
 				struct drm_bridge **bridge);
 int drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1,
 					  const struct device_node *port2);
+int drm_of_lvds_get_data_mapping(const struct device_node *port);
 #else
 static inline uint32_t drm_of_crtc_port_mask(struct drm_device *dev,
 					  struct device_node *port)
@@ -98,6 +99,12 @@ drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1,
 {
 	return -EINVAL;
 }
+
+static inline int
+drm_of_lvds_get_data_mapping(const struct device_node *port)
+{
+	return -EINVAL;
+}
 #endif
 
 /*
-- 
cgit v1.2.3


From ba3078dad1401131293664733c15f6f066079de9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 14 Oct 2021 18:00:57 +0300
Subject: drm/dp: add helpers to read link training delays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The link training delays are different and/or available in different
DPCD offsets depending on:

- Clock recovery vs. channel equalization
- DPRX vs. LTTPR
- 128b/132b vs. 8b/10b
- DPCD 1.4+ vs. earlier

Add helpers to get the correct delays in us, reading DPCD if
necessary. This is more straightforward than trying to retrofit the
existing helpers to take 128b/132b into account.

Having to pass in the DPCD receiver cap field seems unavoidable, because
reading it involves checking the revision and reading extended receiver
cap. So unfortunately the interface is mixed cached and read as needed.

v2: Remove delay_us < 0 check and the whole local var (Ville)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211014150059.28957-1-jani.nikula@intel.com
---
 include/drm/drm_dp_helper.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index a1df35aa6e68..b653c5da7065 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -1114,8 +1114,15 @@ struct drm_panel;
 # define DP_UHBR20                             (1 << 1)
 # define DP_UHBR13_5                           (1 << 2)
 
-#define DP_128B132B_TRAINING_AUX_RD_INTERVAL   0x2216 /* 2.0 */
-# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f
+#define DP_128B132B_TRAINING_AUX_RD_INTERVAL                    0x2216 /* 2.0 */
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK              0x7f
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US            0x00
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS              0x01
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_8_MS              0x02
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_12_MS             0x03
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_16_MS             0x04
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_32_MS             0x05
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_64_MS             0x06
 
 #define DP_TEST_264BIT_CUSTOM_PATTERN_7_0		0x2230
 #define DP_TEST_264BIT_CUSTOM_PATTERN_263_256	0x2250
@@ -1385,6 +1392,11 @@ enum drm_dp_phy {
 # define DP_VOLTAGE_SWING_LEVEL_3_SUPPORTED		    BIT(0)
 # define DP_PRE_EMPHASIS_LEVEL_3_SUPPORTED		    BIT(1)
 
+#define DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1  0xf0022 /* 2.0 */
+#define DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER(dp_phy)	\
+	DP_LTTPR_REG(dp_phy, DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1)
+/* see DP_128B132B_TRAINING_AUX_RD_INTERVAL for values */
+
 #define DP_LANE0_1_STATUS_PHY_REPEATER1			    0xf0030 /* 1.3 */
 #define DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy) \
 	DP_LTTPR_REG(dp_phy, DP_LANE0_1_STATUS_PHY_REPEATER1)
@@ -1521,6 +1533,11 @@ u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZ
 #define DP_LTTPR_COMMON_CAP_SIZE	8
 #define DP_LTTPR_PHY_CAP_SIZE		3
 
+int drm_dp_read_clock_recovery_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE],
+				     enum drm_dp_phy dp_phy, bool uhbr);
+int drm_dp_read_channel_eq_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE],
+				 enum drm_dp_phy dp_phy, bool uhbr);
+
 void drm_dp_link_train_clock_recovery_delay(const struct drm_dp_aux *aux,
 					    const u8 dpcd[DP_RECEIVER_CAP_SIZE]);
 void drm_dp_lttpr_link_train_clock_recovery_delay(void);
-- 
cgit v1.2.3


From dd66f56caea6bb1a3703fb3bfc3106444d05a930 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 21 Oct 2021 08:55:24 +0200
Subject: dma-buf: fix kerneldoc for renamed members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those members where renamed, update the kerneldoc as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211021141945.84023-1-christian.koenig@amd.com
---
 include/linux/dma-buf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 02c2eb874da6..9807aef33685 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -433,8 +433,8 @@ struct dma_buf {
 	/** @poll: for userspace poll support */
 	wait_queue_head_t poll;
 
-	/** @cb_excl: for userspace poll support */
-	/** @cb_shared: for userspace poll support */
+	/** @cb_in: for userspace poll support */
+	/** @cb_out: for userspace poll support */
 	struct dma_buf_poll_cb_t {
 		struct dma_fence_cb cb;
 		wait_queue_head_t *poll;
-- 
cgit v1.2.3


From 1705643faecde95bdeb11bea5ab5baed084e9f91 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 19 Oct 2021 05:30:20 +0800
Subject: mmc: add MT7921 SDIO identifiers for MediaTek Bluetooth devices

The MT7961 SDIO identifier for MediaTek Bluetooth devices were being
referred in the MediaTek Bluetooth driver.

Co-developed-by: Mark-yw Chen <mark-yw.chen@mediatek.com>
Signed-off-by: Mark-yw Chen <mark-yw.chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/mmc/sdio_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index a85c9f0bd470..53f0efa0bccf 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -105,6 +105,7 @@
 #define SDIO_VENDOR_ID_MEDIATEK			0x037a
 #define SDIO_DEVICE_ID_MEDIATEK_MT7663		0x7663
 #define SDIO_DEVICE_ID_MEDIATEK_MT7668		0x7668
+#define SDIO_DEVICE_ID_MEDIATEK_MT7961		0x7961
 
 #define SDIO_VENDOR_ID_MICROCHIP_WILC		0x0296
 #define SDIO_DEVICE_ID_MICROCHIP_WILC1000	0x5347
-- 
cgit v1.2.3


From 3ab7b6ac5d829e60c3b89d415811ff1c9f358c8e Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 25 Oct 2021 10:09:23 -0700
Subject: pwm: Introduce single-PWM of_xlate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing pxa driver and the upcoming addition of PWM support in the
TI sn565dsi86 DSI/eDP bridge driver both has a single PWM channel and
thereby a need for a of_xlate function with the period as its single
argument.

Introduce a common helper function in the core that can be used as
of_xlate by such drivers and migrate the pxa driver to use this.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Tested-by: Steev Klimaszewski <steev@kali.org>
Tested-By: Steev Klimaszewski <steev@kali.org>
Signed-off-by: Robert Foss <robert.foss@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211025170925.3096444-1-bjorn.andersson@linaro.org
---
 include/linux/pwm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 725c9b784e60..dd51d4931fdc 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -414,6 +414,8 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 
 struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
 		const struct of_phandle_args *args);
+struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
+				       const struct of_phandle_args *args);
 
 struct pwm_device *pwm_get(struct device *dev, const char *con_id);
 struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
-- 
cgit v1.2.3


From 6a98e3836fa2077b169f10a35c2ca9952d53f987 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 27 Oct 2021 16:58:38 -0700
Subject: Bluetooth: Add helper for serialized HCI command execution

The usage of __hci_cmd_sync() within the hdev->setup() callback allows for
a nice and simple serialized execution of HCI commands. More importantly
it allows for result processing before issueing the next command.

With the current usage of hci_req_run() it is possible to batch up
commands and execute them, but it is impossible to react to their
results or errors.

This is an attempt to generalize the hdev->setup() handling and provide
a simple way of running multiple HCI commands from a single function
context.

There are multiple struct work that are decdicated to certain tasks
already used right now. It is add a lot of bloat to hci_dev struct and
extra handling code. So it might be possible to put all of these behind
a common HCI command infrastructure and just execute the HCI commands
from the same work context in a serialized fashion.

For example updating the white list and resolving list can be done now
without having to know the list size ahead of time. Also preparing for
suspend or resume shouldn't require a state machine anymore. There are
other tasks that should be simplified as well.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 11 ++++-------
 include/net/bluetooth/hci_sync.h | 42 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 include/net/bluetooth/hci_sync.h

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index dd8840e70e25..17ddea51d161 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -30,6 +30,7 @@
 #include <linux/rculist.h>
 
 #include <net/bluetooth/hci.h>
+#include <net/bluetooth/hci_sync.h>
 #include <net/bluetooth/hci_sock.h>
 
 /* HCI priority */
@@ -475,6 +476,9 @@ struct hci_dev {
 	struct work_struct	power_on;
 	struct delayed_work	power_off;
 	struct work_struct	error_reset;
+	struct work_struct	cmd_sync_work;
+	struct list_head	cmd_sync_work_list;
+	struct mutex		cmd_sync_work_lock;
 
 	__u16			discov_timeout;
 	struct delayed_work	discov_off;
@@ -1690,10 +1694,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
 int hci_register_cb(struct hci_cb *hcb);
 int hci_unregister_cb(struct hci_cb *hcb);
 
-struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
-			       const void *param, u32 timeout);
-struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
-				  const void *param, u8 event, u32 timeout);
 int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
 		   const void *param);
 
@@ -1704,9 +1704,6 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb);
 
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
 
-struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
-			     const void *param, u32 timeout);
-
 u32 hci_conn_get_phy(struct hci_conn *conn);
 
 /* ----- HCI Sockets ----- */
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
new file mode 100644
index 000000000000..fcfdeb3cbd7c
--- /dev/null
+++ b/include/net/bluetooth/hci_sync.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BlueZ - Bluetooth protocol stack for Linux
+ *
+ * Copyright (C) 2021 Intel Corporation
+ */
+
+typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data);
+typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data,
+					    int err);
+
+struct hci_cmd_sync_work_entry {
+	struct list_head list;
+	hci_cmd_sync_work_func_t func;
+	void *data;
+	hci_cmd_sync_work_destroy_t destroy;
+};
+
+/* Function with sync suffix shall not be called with hdev->lock held as they
+ * wait the command to complete and in the meantime an event could be received
+ * which could attempt to acquire hdev->lock causing a deadlock.
+ */
+struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+			       const void *param, u32 timeout);
+struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+			     const void *param, u32 timeout);
+struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
+				  const void *param, u8 event, u32 timeout);
+struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
+				  const void *param, u8 event, u32 timeout,
+				  struct sock *sk);
+int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+			  const void *param, u32 timeout);
+int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
+			     const void *param, u8 event, u32 timeout,
+			     struct sock *sk);
+
+void hci_cmd_sync_init(struct hci_dev *hdev);
+void hci_cmd_sync_clear(struct hci_dev *hdev);
+
+int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+		       void *data, hci_cmd_sync_work_destroy_t destroy);
-- 
cgit v1.2.3


From 161510ccf91c961638940b03abb1ee804be53a97 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:39 -0700
Subject: Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 1

This make use of hci_cmd_sync_queue for the following MGMT commands:

Set Device Class
Set Device ID
Add UUID
Remove UUID

tools/mgmt-tester -s "Set Device Class"

Test Summary
------------
Set Device Class - Success 1                         Passed
Set Device Class - Success 2                         Passed
Set Device Class - Invalid parameters 1              Passed
Total: 3, Passed: 3 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.0599 seconds

tools/mgmt-tester -s "Set Device ID"

Test Summary
------------
Set Device ID - Success 1                            Passed
Set Device ID - Success 2                            Passed
Set Device ID - Disable                              Passed
Set Device ID - Power off and Power on               Passed
Set Device ID - SSP off and Power on                 Passed
Set Device ID - Invalid Parameter                    Passed
Total: 6, Passed: 6 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.107 seconds

tools/mgmt-tester -s "Add UUID"

Test Summary
------------
Add UUID - UUID-16 1                                 Passed
Add UUID - UUID-16 multiple 1                        Passed
Add UUID - UUID-16 partial 1                         Passed
Add UUID - UUID-32 1                                 Passed
Add UUID - UUID-32 multiple 1                        Passed
Add UUID - UUID-32 partial 1                         Passed
Add UUID - UUID-128 1                                Passed
Add UUID - UUID-128 multiple 1                       Passed
Add UUID - UUID-128 partial 1                        Passed
Add UUID - UUID mix                                  Passed
Total: 10, Passed: 10 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.198 seconds

tools/mgmt-tester -s "Remove UUID"

Test Summary
------------
Remove UUID - Success 1                              Passed
Remove UUID - All UUID - Success 2                   Passed
Remove UUID - Power Off - Success 3                  Passed
Remove UUID - Power Off and On - Success 4           Passed
Remove UUID - Not Exist - Invalid Params 1           Passed
Total: 5, Passed: 5 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.0908 seconds

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index fcfdeb3cbd7c..fe77ff97bc8e 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -40,3 +40,6 @@ void hci_cmd_sync_clear(struct hci_dev *hdev);
 
 int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 		       void *data, hci_cmd_sync_work_destroy_t destroy);
+
+int hci_update_eir_sync(struct hci_dev *hdev);
+int hci_update_class_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From cba6b758711cab946c787f7c15be92cc749b8e1f Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:40 -0700
Subject: Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 2

This make use of hci_cmd_sync_queue for the following MGMT commands:

Add Advertising
Remove Advertising
Add Extended Advertising Parameters
Add Extended Advertising Data

mgmt-tester -s "Add Advertising"

Test Summary
------------
Add Advertising - Failure: LE off                    Passed
Add Advertising - Invalid Params 1 (AD too long)     Passed
Add Advertising - Invalid Params 2 (Malformed len)   Passed
Add Advertising - Invalid Params 3 (Malformed len)   Passed
Add Advertising - Invalid Params 4 (Malformed len)   Passed
Add Advertising - Invalid Params 5 (AD too long)     Passed
Add Advertising - Invalid Params 6 (ScRsp too long)  Passed
Add Advertising - Invalid Params 7 (Malformed len)   Passed
Add Advertising - Invalid Params 8 (Malformed len)   Passed
Add Advertising - Invalid Params 9 (Malformed len)   Passed
Add Advertising - Invalid Params 10 (ScRsp too long) Passed
Add Advertising - Rejected (Timeout, !Powered)       Passed
Add Advertising - Success 1 (Powered, Add Adv Inst)  Passed
Add Advertising - Success 2 (!Powered, Add Adv Inst) Passed
Add Advertising - Success 3 (!Powered, Adv Enable)   Passed
Add Advertising - Success 4 (Set Adv on override)    Passed
Add Advertising - Success 5 (Set Adv off override)   Passed
Add Advertising - Success 6 (Scan Rsp Dta, Adv ok)   Passed
Add Advertising - Success 7 (Scan Rsp Dta, Scan ok)  Passed
Add Advertising - Success 8 (Connectable Flag)       Passed
Add Advertising - Success 9 (General Discov Flag)    Passed
Add Advertising - Success 10 (Limited Discov Flag)   Passed
Add Advertising - Success 11 (Managed Flags)         Passed
Add Advertising - Success 12 (TX Power Flag)         Passed
Add Advertising - Success 13 (ADV_SCAN_IND)          Passed
Add Advertising - Success 14 (ADV_NONCONN_IND)       Passed
Add Advertising - Success 15 (ADV_IND)               Passed
Add Advertising - Success 16 (Connectable -> on)     Passed
Add Advertising - Success 17 (Connectable -> off)    Passed
Add Advertising - Success 18 (Power -> off, Remove)  Passed
Add Advertising - Success 19 (Power -> off, Keep)    Passed
Add Advertising - Success 20 (Add Adv override)      Passed
Add Advertising - Success 21 (Timeout expires)       Passed
Add Advertising - Success 22 (LE -> off, Remove)     Passed
Add Advertising - Success (Empty ScRsp)              Passed
Add Advertising - Success (ScRsp only)               Passed
Add Advertising - Invalid Params (ScRsp too long)    Passed
Add Advertising - Success (ScRsp appear)             Passed
Add Advertising - Invalid Params (ScRsp appear long) Passed
Add Advertising - Success (Appear is null)           Passed
Add Advertising - Success (Name is null)             Passed
Add Advertising - Success (Complete name)            Passed
Add Advertising - Success (Shortened name)           Passed
Add Advertising - Success (Short name)               Passed
Add Advertising - Success (Name + data)              Passed
Add Advertising - Invalid Params (Name + data)       Passed
Add Advertising - Success (Name+data+appear)         Passed
Total: 47, Passed: 47 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 2.17 seconds

mgmt-tester -s "Remove Advertising"

Test Summary
------------
Remove Advertising - Invalid Params 1                Passed
Remove Advertising - Success 1                       Passed
Remove Advertising - Success 2                       Passed
Total: 3, Passed: 3 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.0585 seconds

mgmt-tester -s "Ext Adv MGMT Params"

Test Summary:
------------
Ext Adv MGMT Params - Unpowered                      Passed
Ext Adv MGMT Params - Invalid parameters             Passed
Ext Adv MGMT Params - Success                        Passed
Ext Adv MGMT Params - (5.0) Success                  Passed
Total: 4, Passed: 4 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.0746 seconds

mgmt-tester -s "Ext Adv MGMT -"

Test Summary
------------
Ext Adv MGMT - Data set without Params               Passed
Ext Adv MGMT - AD Data (5.0) Invalid parameters      Passed
Ext Adv MGMT - AD Data (5.0) Success                 Passed
Ext Adv MGMT - AD Scan Response (5.0) Success        Passed
Total: 4, Passed: 4 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.0805 seconds

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  2 ++
 include/net/bluetooth/hci_sync.h  | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 3271870fd85e..2f31e571f34c 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -380,6 +380,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status,
 #define HCI_REQ_SKB	BIT(1)
 
 struct hci_ctrl {
+	struct sock *sk;
 	u16 opcode;
 	u8 req_flags;
 	u8 req_event;
@@ -405,6 +406,7 @@ struct bt_skb_cb {
 #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type
 #define hci_skb_expect(skb) bt_cb((skb))->expect
 #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode
+#define hci_skb_sk(skb) bt_cb((skb))->hci.sk
 
 static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
 {
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index fe77ff97bc8e..143829cd1b29 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -43,3 +43,23 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 
 int hci_update_eir_sync(struct hci_dev *hdev);
 int hci_update_class_sync(struct hci_dev *hdev);
+
+int hci_update_eir_sync(struct hci_dev *hdev);
+int hci_update_class_sync(struct hci_dev *hdev);
+
+int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy,
+				   bool rpa, u8 *own_addr_type);
+
+int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance);
+int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance);
+int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
+				   bool force);
+
+int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance);
+int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance);
+int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance);
+int hci_enable_advertising_sync(struct hci_dev *hdev);
+
+int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
+				u8 instance, bool force);
+int hci_disable_advertising_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From e8907f76544ffe225ab95d70f7313267b1d0c76d Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:41 -0700
Subject: Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 3

This make use of hci_cmd_sync_queue for the following MGMT commands:

    Add Device
    Remove Device

Tested with:

mgmt-tester -s "Add Device"

Test Summary
------------
Add Device - Invalid Params 1                        Passed
Add Device - Invalid Params 2                        Passed
Add Device - Invalid Params 3                        Passed
Add Device - Invalid Params 4                        Passed
Add Device - Success 1                               Passed
Add Device - Success 2                               Passed
Add Device - Success 3                               Passed
Add Device - Success 4                               Passed
Add Device - Success 5                               Passed
Add Device - Success 6 - Add to whitelist            Passed
Add Device - Success 7 - Add to resolv list          Passed
Add Device - Success 8 - Enable resolv list          Passed
Total: 12, Passed: 12 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 0.209 seconds

mgmt-tester -s "Remove Device"

Test Summary
------------
Remove Device - Invalid Params 1                     Passed
Remove Device - Invalid Params 2                     Passed
Remove Device - Invalid Params 3                     Passed
Remove Device - Success 1                            Passed
Remove Device - Success 2                            Passed
Remove Device - Success 3                            Passed
Remove Device - Success 4                            Passed
Remove Device - Success 5                            Passed
Remove Device - Success 6 - All Devices              Passed
Remove Device - Success 7 - Remove from whitelist    Passed
Remove Device - Success 8 - Remove from resolv list  Passed
Total: 11, Passed: 11 (100.0%), Failed: 0, Not Run: 0
Overall execution time: 4.26 seconds

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 143829cd1b29..b0708f900d89 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -63,3 +63,5 @@ int hci_enable_advertising_sync(struct hci_dev *hdev);
 int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
 				u8 instance, bool force);
 int hci_disable_advertising_sync(struct hci_dev *hdev);
+
+int hci_update_passive_scan_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From ad383c2c65a5baf16e334cd40a013cc302176891 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:42 -0700
Subject: Bluetooth: hci_sync: Enable advertising when LL privacy is enabled

This enables advertising when LL privacy is enabled and changes the
command sequence when resolving list is updated to also account for when
advertising is enabled using the following sequence:

If there are devices to scan:

Disable Scanning -> Update Accept List ->
use_ll_privacy((Disable Advertising) -> Disable Resolving List ->
Update Resolving List -> Enable Resolving List -> (Enable Advertising)) ->
Enable Scanning

Otherwise:

Disable Scanning

Errors during the Update Accept List stage are handled gracefully by
restoring any previous state (e.g. advertising) and disabling the use of
accept list as either accept list or resolving list could not be
updated.

Tested with:

mgmt-tester -s "LL Privacy"

Test Summary
------------
LL Privacy - Add Device 1 (Add to WL)                Passed
LL Privacy - Add Device 2 (Add to RL)                Passed
LL Privacy - Add Device 3 (Enable RL)                Passed
LL Privacy - Add Device 4 (2 Devices to WL)          Passed
LL Privacy - Add Device 5 (2 Devices to RL)          Passed
LL Privacy - Add Device 6 (RL is full)               Passed
LL Privacy - Add Device 7 (WL is full)               Passed
LL Privacy - Add Device 8 (Disable Adv)              Passed
LL Privacy - Add Device 9 (Multi Adv)                Passed
LL Privacy - Add Device 10 (Multi Dev and Multi Adv) Passed
LL Privacy - Remove Device 1 (Remove from WL)        Passed
LL Privacy - Remove Device 2 (Remove from RL)        Passed
LL Privacy - Remove Device 3 (Disable RL)            Passed
LL Privacy - Remove Device 4 (Disable Adv)           Passed
LL Privacy - Remove Device 5 (Multi Adv)             Passed
LL Privacy - Start Discovery 1 (Disable RL)          Passed
LL Privacy - Start Discovery 2 (Disable RL)          Passed
Total: 18, Passed: 18 (100.0%), Failed: 0, Not Run: 0

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 5 ++++-
 include/net/bluetooth/hci_sync.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 17ddea51d161..a6b075203cbe 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1465,8 +1465,11 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
 			 ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
 
+#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)
+
 /* Use LL Privacy based address resolution if supported */
-#define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)
+#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \
+			     hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY))
 
 /* Use enhanced synchronous connection if command is supported */
 #define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08)
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index b0708f900d89..ec727eb18e90 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -65,3 +65,4 @@ int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
 int hci_disable_advertising_sync(struct hci_dev *hdev);
 
 int hci_update_passive_scan_sync(struct hci_dev *hdev);
+int hci_update_passive_scan(struct hci_dev *hdev);
-- 
cgit v1.2.3


From cf75ad8b41d2aa06f98f365d42a3ae8b059daddd Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:44 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_SET_POWERED

This make use of hci_cmd_sync_queue when MGMT_SET_POWERED is used so all
commands are run within hdev->cmd_sync_work instead of
hdev->power_on_work and hdev->power_off_work.

In addition to that the power on sequence now takes into account if
local IRK needs to be programmed in the resolving list.

Tested with:

tools/mgmt-tester -s "Set powered"

Test Summary
------------
Set powered on - Success                             Passed
Set powered on - Invalid parameters 1                Passed
Set powered on - Invalid parameters 2                Passed
Set powered on - Invalid parameters 3                Passed
Set powered on - Invalid index                       Passed
Set powered on - Privacy and Advertising             Passed
Set powered off - Success                            Passed
Set powered off - Class of Device                    Passed
Set powered off - Invalid parameters 1               Passed
Set powered off - Invalid parameters 2               Passed
Set powered off - Invalid parameters 3               Passed
Total: 11, Passed: 11 (100.0%), Failed: 0, Not Run: 0

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index ec727eb18e90..00203077e656 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -66,3 +66,9 @@ int hci_disable_advertising_sync(struct hci_dev *hdev);
 
 int hci_update_passive_scan_sync(struct hci_dev *hdev);
 int hci_update_passive_scan(struct hci_dev *hdev);
+
+int hci_dev_open_sync(struct hci_dev *hdev);
+int hci_dev_close_sync(struct hci_dev *hdev);
+
+int hci_powered_update_sync(struct hci_dev *hdev);
+int hci_set_powered_sync(struct hci_dev *hdev, u8 val);
-- 
cgit v1.2.3


From abfeea476c68afea54c9c050a2d3b19d5d2ee873 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:45 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_START_DISCOVERY

This make use of hci_cmd_sync_queue for MGMT_OP_START_DISCOVERY,
MGMT_OP_START_SERVICE_DISCOVERY and MGMT_OP_STOP_DISCOVERY to use
hci_cmd_sync_queue so they no longer depend on hdev->discov_update work
to send any commands.

Tested with:

tools/mgmt-tester -s "Start Discovery"

Test Summary
------------
Start Discovery - Not powered 1                      Passed
Start Discovery - Invalid parameters 1               Passed
Start Discovery - Not supported 1                    Passed
Start Discovery - Success 1                          Passed
Start Discovery - Success 2                          Passed
Start Discovery - Power Off 1                        Passed
Start Discovery BREDR LE - (Ext Scan Enable)         Passed
Start Discovery LE - (Ext Scan Enable)               Passed
Start Discovery LE - (Ext Scan Param)                Passed
Start Discovery - (2m, Scan Param)                   Passed
Start Discovery - (coded, Scan Param)                Passed
Start Discovery - (1m, 2m, coded, Scan Param)        Passed
LL Privacy - Start Discovery 1 (Disable RL)          Passed
LL Privacy - Start Discovery 2 (Disable RL)          Passed
Total: 14, Passed: 14 (100.0%), Failed: 0, Not Run: 0

tools/mgmt-tester -s "Start Service"

Test Summary
------------
Start Service Discovery - Not powered 1              Passed
Start Service Discovery - Invalid parameters 1       Passed
Start Service Discovery - Not supported 1            Passed
Start Service Discovery - Success 1                  Passed
Start Service Discovery - Success 2                  Passed
Total: 5, Passed: 5 (100.0%), Failed: 0, Not Run: 0

tools/mgmt-tester -s "Stop Discovery"

Test Summary
------------
Stop Discovery - Success 1                           Passed
Stop Discovery - BR/EDR (Inquiry) Success 1          Passed
Stop Discovery - Rejected 1                          Passed
Stop Discovery - Invalid parameters 1                Passed
Stop Discovery - (Ext Scan Disable)                  Passed
Total: 5, Passed: 5 (100.0%), Failed: 0, Not Run: 0

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 00203077e656..c4fa77321b31 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -59,6 +59,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance);
 int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance);
 int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance);
 int hci_enable_advertising_sync(struct hci_dev *hdev);
+int hci_enable_advertising(struct hci_dev *hdev);
 
 int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
 				u8 instance, bool force);
@@ -72,3 +73,6 @@ int hci_dev_close_sync(struct hci_dev *hdev);
 
 int hci_powered_update_sync(struct hci_dev *hdev);
 int hci_set_powered_sync(struct hci_dev *hdev, u8 val);
+
+int hci_start_discovery_sync(struct hci_dev *hdev);
+int hci_stop_discovery_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 353a0249c3f60365c55d53e2d068de4f43669a22 Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:46 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_FAST_CONNECTABLE

This creates a synchronized Write Fast Connectable call and attaches it
to the MGMT_OP_SET_FAST_CONNECTABLE management opcode.

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index c4fa77321b31..1fb66b6f8a34 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -68,6 +68,8 @@ int hci_disable_advertising_sync(struct hci_dev *hdev);
 int hci_update_passive_scan_sync(struct hci_dev *hdev);
 int hci_update_passive_scan(struct hci_dev *hdev);
 
+int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
+
 int hci_dev_open_sync(struct hci_dev *hdev);
 int hci_dev_close_sync(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From 451d95a98c5a350da2f2c5447cc17115a5b94c8e Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:47 -0700
Subject: Bluetooth: hci_sync: Enable synch'd set_bredr

Uses previously written:
  hci_write_fast_connectable_sync
  hci_update_scan_sync
  hci_update_adv_data_sync

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 1fb66b6f8a34..03ffe95415fc 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -69,6 +69,7 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev);
 int hci_update_passive_scan(struct hci_dev *hdev);
 
 int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
+int hci_update_scan_sync(struct hci_dev *hdev);
 
 int hci_dev_open_sync(struct hci_dev *hdev);
 int hci_dev_close_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 47db6b42991e6d5645d0938e43085aaf88cdfba4 Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:48 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_GET_CONN_INFO

Synchronous version of MGMT_OP_GET_CONN_INFO

Implements:
  hci_read_rssi_sync
  hci_read_tx_power_sync

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 03ffe95415fc..156cb5faf64f 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -67,6 +67,8 @@ int hci_disable_advertising_sync(struct hci_dev *hdev);
 
 int hci_update_passive_scan_sync(struct hci_dev *hdev);
 int hci_update_passive_scan(struct hci_dev *hdev);
+int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle);
+int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type);
 
 int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
 int hci_update_scan_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 2f2eb0c9de2eb69969aaf04feffb69310d3804b2 Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:49 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_SECURE_CONN

Synchronous version of MGMT_OP_SET_SECURE_CONN.

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 156cb5faf64f..4b27a89cc57e 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -69,6 +69,7 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev);
 int hci_update_passive_scan(struct hci_dev *hdev);
 int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle);
 int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type);
+int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val);
 
 int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
 int hci_update_scan_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 5a750137466400276560458db040c143cbc254ed Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:50 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_GET_CLOCK_INFO

Synchronous version of MGMT_OP_GET_CLOCK_INFO.

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 4b27a89cc57e..cf54f8f14edb 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -70,6 +70,7 @@ int hci_update_passive_scan(struct hci_dev *hdev);
 int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle);
 int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type);
 int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val);
+int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp);
 
 int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
 int hci_update_scan_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From d81a494c43df66f053f7d1ec612e057eb99f34d4 Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:51 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_LE

Uses:
  hci_disable_advertising_sync
  hci_remove_ext_adv_instance_sync
  hci_write_le_host_supported_sync
  hci_setup_ext_adv_instance_sync
  hci_update_scan_rsp_data_sync

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index cf54f8f14edb..d969693c33b5 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -75,6 +75,10 @@ int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp);
 int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable);
 int hci_update_scan_sync(struct hci_dev *hdev);
 
+int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul);
+int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
+				     struct sock *sk);
+
 int hci_dev_open_sync(struct hci_dev *hdev);
 int hci_dev_close_sync(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From f892244b05bf6a99e48db14f8bbd96db16bcfa69 Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:52 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_READ_LOCAL_OOB_DATA

New functions:
  hci_read_local_oob_data_sync

This function requires all of the data from the cmd cmplt event
to be passed up to the caller via the skb.

mgmt-tester paths:
Read Local OOB Data - Not powered
Read Local OOB Data - Legacy pairing
Read Local OOB Data - Success SSP
Read Local OOB Data - Success SC

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index d969693c33b5..a381621a56a1 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -78,6 +78,8 @@ int hci_update_scan_sync(struct hci_dev *hdev);
 int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul);
 int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
 				     struct sock *sk);
+struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext,
+					     struct sock *sk);
 
 int hci_dev_open_sync(struct hci_dev *hdev);
 int hci_dev_close_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 6f6ff38a1e140d7ca3a733556b0027eec1fe4ef1 Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:54 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_LOCAL_NAME

Uses existing *_sync functions, but made hci_update_name_sync
non-static.

mgmt-test paths:
Set Advertising on - Local name 1
Set Advertising on - Name + Appear 1
Set Local Name - Success 1
Set Local Name - Success 2
Set Local Name - Success 3
Add Advertising - Success (Empty ScRsp)
Add Advertising - Success (Complete name)
Add Advertising - Success (Shortened name)
Add Advertising - Success (Short name)
Add Advertising - Success (Name + data)
Add Advertising - Invalid Params (Name + data)
Add Advertising - Success (Name+data+appear)
Read Ext Controller Info 3
Read Ext Controller Info 4
Read Ext Controller Info 5
Add Ext Advertising - Success (Empty ScRsp)
Add Ext Advertising - Success (Complete name)
Add Ext Advertising - Success (Shortened name)
Add Ext Advertising - Success (Short name)
Add Ext Advertising - Success (Name + data)
Add Ext Advertising - Invalid Params (Name + data)
Add Ext Advertising - Success (Name+data+appear)

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index a381621a56a1..d9f2e3182ed8 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -46,6 +46,7 @@ int hci_update_class_sync(struct hci_dev *hdev);
 
 int hci_update_eir_sync(struct hci_dev *hdev);
 int hci_update_class_sync(struct hci_dev *hdev);
+int hci_update_name_sync(struct hci_dev *hdev);
 
 int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy,
 				   bool rpa, u8 *own_addr_type);
-- 
cgit v1.2.3


From 3244845c6307fa6f4fa2eabe5259d2c93c837dce Mon Sep 17 00:00:00 2001
From: Brian Gix <brian.gix@intel.com>
Date: Wed, 27 Oct 2021 16:58:58 -0700
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SSP

mgmt-tester paths:
Set SSP on - Success 2
Set Device ID - SSP off and Power on

Signed-off-by: Brian Gix <brian.gix@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 -
 include/net/bluetooth/hci_sync.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a6b075203cbe..3e53c845ab0e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1806,7 +1806,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			     u8 entered);
 void mgmt_auth_failed(struct hci_conn *conn, u8 status);
 void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
-void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
 void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 				    u8 status);
 void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index d9f2e3182ed8..db96546d40c8 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -47,6 +47,7 @@ int hci_update_class_sync(struct hci_dev *hdev);
 int hci_update_eir_sync(struct hci_dev *hdev);
 int hci_update_class_sync(struct hci_dev *hdev);
 int hci_update_name_sync(struct hci_dev *hdev);
+int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode);
 
 int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy,
 				   bool rpa, u8 *own_addr_type);
-- 
cgit v1.2.3


From d0b137062b2de75b264b84143d21c98abc5f5ad2 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:58:59 -0700
Subject: Bluetooth: hci_sync: Rework init stages

This moves the init stages to use the hci_sync infra and in addition
to that have the stages as function tables so it is easier to change
the command sequence.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index db96546d40c8..487e4981cce1 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -83,6 +83,7 @@ int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
 struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext,
 					     struct sock *sk);
 
+int hci_reset_sync(struct hci_dev *hdev);
 int hci_dev_open_sync(struct hci_dev *hdev);
 int hci_dev_close_sync(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From 182ee45da083db4e3e621541ccf255bfa9652214 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 27 Oct 2021 16:59:00 -0700
Subject: Bluetooth: hci_sync: Rework hci_suspend_notifier

This makes hci_suspend_notifier use the hci_*_sync which can be
executed synchronously which is allowed in the suspend_notifier and
simplifies a lot of the handling since the status of each command can
be checked inline so no other work need to be scheduled thus can be
performed without using of a state machine.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ----
 include/net/bluetooth/hci_sync.h | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3e53c845ab0e..53a8c7d3a4bf 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -523,7 +523,6 @@ struct hci_dev {
 	bool			advertising_paused;
 
 	struct notifier_block	suspend_notifier;
-	struct work_struct	suspend_prepare;
 	enum suspended_state	suspend_state_next;
 	enum suspended_state	suspend_state;
 	bool			scanning_paused;
@@ -532,9 +531,6 @@ struct hci_dev {
 	bdaddr_t		wake_addr;
 	u8			wake_addr_type;
 
-	wait_queue_head_t	suspend_wait_q;
-	DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS);
-
 	struct hci_conn_hash	conn_hash;
 
 	struct list_head	mgmt_pending;
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 487e4981cce1..00b13e8ca800 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -92,3 +92,6 @@ int hci_set_powered_sync(struct hci_dev *hdev, u8 val);
 
 int hci_start_discovery_sync(struct hci_dev *hdev);
 int hci_stop_discovery_sync(struct hci_dev *hdev);
+
+int hci_suspend_sync(struct hci_dev *hdev);
+int hci_resume_sync(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 542cff7893a37445f98ece26aeb3c9c1055e9ea4 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Thu, 28 Oct 2021 12:24:03 -0400
Subject: drm/sched: Avoid lockdep spalt on killing a processes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Probelm:
Singlaning one sched fence from within another's sched
fence singal callback generates lockdep splat because
the both have same lockdep class of their fence->lock

Fix:
Fix bellow stack by rescheduling to irq work of
signaling and killing of jobs that left when entity is killed.

[11176.741181]  dump_stack+0x10/0x12
[11176.741186] __lock_acquire.cold+0x208/0x2df
[11176.741197]  lock_acquire+0xc6/0x2d0
[11176.741204]  ? dma_fence_signal+0x28/0x80
[11176.741212] _raw_spin_lock_irqsave+0x4d/0x70
[11176.741219]  ? dma_fence_signal+0x28/0x80
[11176.741225]  dma_fence_signal+0x28/0x80
[11176.741230] drm_sched_fence_finished+0x12/0x20 [gpu_sched]
[11176.741240] drm_sched_entity_kill_jobs_cb+0x1c/0x50 [gpu_sched]
[11176.741248] dma_fence_signal_timestamp_locked+0xac/0x1a0
[11176.741254]  dma_fence_signal+0x3b/0x80
[11176.741260] drm_sched_fence_finished+0x12/0x20 [gpu_sched]
[11176.741268] drm_sched_job_done.isra.0+0x7f/0x1a0 [gpu_sched]
[11176.741277] drm_sched_job_done_cb+0x12/0x20 [gpu_sched]
[11176.741284] dma_fence_signal_timestamp_locked+0xac/0x1a0
[11176.741290]  dma_fence_signal+0x3b/0x80
[11176.741296] amdgpu_fence_process+0xd1/0x140 [amdgpu]
[11176.741504] sdma_v4_0_process_trap_irq+0x8c/0xb0 [amdgpu]
[11176.741731]  amdgpu_irq_dispatch+0xce/0x250 [amdgpu]
[11176.741954]  amdgpu_ih_process+0x81/0x100 [amdgpu]
[11176.742174]  amdgpu_irq_handler+0x26/0xa0 [amdgpu]
[11176.742393] __handle_irq_event_percpu+0x4f/0x2c0
[11176.742402] handle_irq_event_percpu+0x33/0x80
[11176.742408]  handle_irq_event+0x39/0x60
[11176.742414]  handle_edge_irq+0x93/0x1d0
[11176.742419]  __common_interrupt+0x50/0xe0
[11176.742426]  common_interrupt+0x80/0x90

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Suggested-by: Daniel Vetter  <daniel.vetter@ffwll.ch>
Suggested-by: Christian König <christian.koenig@amd.com>
Tested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/dri-devel/msg321250.html
---
 include/drm/gpu_scheduler.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index f011e4c407f2..bbc22fad8d80 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -28,6 +28,7 @@
 #include <linux/dma-fence.h>
 #include <linux/completion.h>
 #include <linux/xarray.h>
+#include <linux/irq_work.h>
 
 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
 
@@ -286,7 +287,16 @@ struct drm_sched_job {
 	struct list_head		list;
 	struct drm_gpu_scheduler	*sched;
 	struct drm_sched_fence		*s_fence;
-	struct dma_fence_cb		finish_cb;
+
+	/*
+	 * work is used only after finish_cb has been used and will not be
+	 * accessed anymore.
+	 */
+	union {
+		struct dma_fence_cb		finish_cb;
+		struct irq_work 		work;
+	};
+
 	uint64_t			id;
 	atomic_t			karma;
 	enum drm_sched_priority		s_priority;
-- 
cgit v1.2.3


From 0d6a8c5e96833f644b91528de6a3a4398214fb9c Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Mon, 18 Oct 2021 08:47:25 +0000
Subject: drm/sysfs: introduce drm_sysfs_connector_hotplug_event

This function sends a hotplug uevent with a CONNECTOR property.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211018084707.32253-2-contact@emersion.fr
---
 include/drm/drm_sysfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h
index d454ef617b2c..6273cac44e47 100644
--- a/include/drm/drm_sysfs.h
+++ b/include/drm/drm_sysfs.h
@@ -11,6 +11,7 @@ int drm_class_device_register(struct device *dev);
 void drm_class_device_unregister(struct device *dev);
 
 void drm_sysfs_hotplug_event(struct drm_device *dev);
+void drm_sysfs_connector_hotplug_event(struct drm_connector *connector);
 void drm_sysfs_connector_status_event(struct drm_connector *connector,
 				      struct drm_property *property);
 #endif
-- 
cgit v1.2.3


From 710074bb8ab0efac425a43473b8a3e057d645f82 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Mon, 18 Oct 2021 08:47:26 +0000
Subject: drm/probe-helper: add drm_kms_helper_connector_hotplug_event

This function is the same as drm_kms_helper_hotplug_event, but takes
a connector instead of a device.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211018084707.32253-3-contact@emersion.fr
---
 include/drm/drm_probe_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h
index 04c57564c397..48300aa6ca71 100644
--- a/include/drm/drm_probe_helper.h
+++ b/include/drm/drm_probe_helper.h
@@ -20,6 +20,7 @@ void drm_kms_helper_poll_fini(struct drm_device *dev);
 bool drm_helper_hpd_irq_event(struct drm_device *dev);
 bool drm_connector_helper_hpd_irq_event(struct drm_connector *connector);
 void drm_kms_helper_hotplug_event(struct drm_device *dev);
+void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
-- 
cgit v1.2.3


From 749a6c594203b0e9ac59e3d8da492a8ac6a80510 Mon Sep 17 00:00:00 2001
From: Joseph Hwang <josephsih@chromium.org>
Date: Tue, 2 Nov 2021 15:19:28 +0800
Subject: Bluetooth: Add struct of reading AOSP vendor capabilities

This patch adds the struct of reading AOSP vendor capabilities.
New capabilities are added incrementally. Note that the
version_supported octets will be used to determine whether a
capability has been defined for the version.

Signed-off-by: Joseph Hwang <josephsih@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 53a8c7d3a4bf..b5f061882c10 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -603,6 +603,7 @@ struct hci_dev {
 
 #if IS_ENABLED(CONFIG_BT_AOSPEXT)
 	bool			aosp_capable;
+	bool			aosp_quality_report;
 #endif
 
 	int (*open)(struct hci_dev *hdev);
-- 
cgit v1.2.3


From f1d3b8f91d965c4fd900ac5dd06240cc9df0c7a7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:15 -0400
Subject: fs: dlm: initial support for tracepoints

This patch adds initial support for dlm tracepoints. It will introduce
tracepoints to dlm main functionality dlm_lock()/dlm_unlock() and their
complete ast() callback or blocking bast() callback.

The lock/unlock functionality has a start and end tracepoint, this is
because there exists a race in case if would have a tracepoint at the
end position only the complete/blocking callbacks could occur before. To
work with eBPF tracing and using their lookup hash functionality there
could be problems that an entry was not inserted yet. However use the
start functionality for hash insert and check again in end functionality
if there was an dlm internal error so there is no ast callback. In further
it might also that locks with local masters will occur those callbacks
immediately so we must have such functionality.

I did not make everything accessible yet, although it seems eBPF can be
used to access a lot of internal datastructures if it's aware of the
struct definitions of the running kernel instance. We still can change
it, if you do eBPF experiments e.g. time measurements between lock and
callback functionality you can simple use the local lkb_id field as hash
value in combination with the lockspace id if you have multiple
lockspaces. Otherwise you can simple use trace-cmd for some functionality,
e.g. `trace-cmd record -e dlm` and `trace-cmd report` afterwards.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/trace/events/dlm.h | 220 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 include/trace/events/dlm.h

(limited to 'include')

diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
new file mode 100644
index 000000000000..c97b4c163c3e
--- /dev/null
+++ b/include/trace/events/dlm.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dlm
+
+#if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DLM_H
+
+#include <linux/dlm.h>
+#include <linux/dlmconstants.h>
+#include <linux/tracepoint.h>
+
+#include "../../../fs/dlm/dlm_internal.h"
+
+#define show_lock_flags(flags) __print_flags(flags, "|",	\
+	{ DLM_LKF_NOQUEUE,	"NOQUEUE" },			\
+	{ DLM_LKF_CANCEL,	"CANCEL" },			\
+	{ DLM_LKF_CONVERT,	"CONVERT" },			\
+	{ DLM_LKF_VALBLK,	"VALBLK" },			\
+	{ DLM_LKF_QUECVT,	"QUECVT" },			\
+	{ DLM_LKF_IVVALBLK,	"IVVALBLK" },			\
+	{ DLM_LKF_CONVDEADLK,	"CONVDEADLK" },			\
+	{ DLM_LKF_PERSISTENT,	"PERSISTENT" },			\
+	{ DLM_LKF_NODLCKWT,	"NODLCKWT" },			\
+	{ DLM_LKF_NODLCKBLK,	"NODLCKBLK" },			\
+	{ DLM_LKF_EXPEDITE,	"EXPEDITE" },			\
+	{ DLM_LKF_NOQUEUEBAST,	"NOQUEUEBAST" },		\
+	{ DLM_LKF_HEADQUE,	"HEADQUE" },			\
+	{ DLM_LKF_NOORDER,	"NOORDER" },			\
+	{ DLM_LKF_ORPHAN,	"ORPHAN" },			\
+	{ DLM_LKF_ALTPR,	"ALTPR" },			\
+	{ DLM_LKF_ALTCW,	"ALTCW" },			\
+	{ DLM_LKF_FORCEUNLOCK,	"FORCEUNLOCK" },		\
+	{ DLM_LKF_TIMEOUT,	"TIMEOUT" })
+
+#define show_lock_mode(mode) __print_symbolic(mode,		\
+	{ DLM_LOCK_IV,		"IV"},				\
+	{ DLM_LOCK_NL,		"NL"},				\
+	{ DLM_LOCK_CR,		"CR"},				\
+	{ DLM_LOCK_CW,		"CW"},				\
+	{ DLM_LOCK_PR,		"PR"},				\
+	{ DLM_LOCK_PW,		"PW"},				\
+	{ DLM_LOCK_EX,		"EX"})
+
+#define show_dlm_sb_flags(flags) __print_flags(flags, "|",	\
+	{ DLM_SBF_DEMOTED,	"DEMOTED" },			\
+	{ DLM_SBF_VALNOTVALID,	"VALNOTVALID" },		\
+	{ DLM_SBF_ALTMODE,	"ALTMODE" })
+
+/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */
+TRACE_EVENT(dlm_lock_start,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode,
+		 __u32 flags),
+
+	TP_ARGS(ls, lkb, mode, flags),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(int, mode)
+		__field(__u32, flags)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->mode = mode;
+		__entry->flags = flags;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_mode(__entry->mode),
+		  show_lock_flags(__entry->flags))
+
+);
+
+TRACE_EVENT(dlm_lock_end,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags,
+		 int error),
+
+	TP_ARGS(ls, lkb, mode, flags, error),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(int, mode)
+		__field(__u32, flags)
+		__field(int, error)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->mode = mode;
+		__entry->flags = flags;
+
+		/* return value will be zeroed in those cases by dlm_lock()
+		 * we do it here again to not introduce more overhead if
+		 * trace isn't running and error reflects the return value.
+		 */
+		if (error == -EAGAIN || error == -EDEADLK)
+			__entry->error = 0;
+		else
+			__entry->error = error;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_mode(__entry->mode),
+		  show_lock_flags(__entry->flags), __entry->error)
+
+);
+
+TRACE_EVENT(dlm_bast,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode),
+
+	TP_ARGS(ls, lkb, mode),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(int, mode)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->mode = mode;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id,
+		  __entry->lkb_id, show_lock_mode(__entry->mode))
+
+);
+
+TRACE_EVENT(dlm_ast,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb),
+
+	TP_ARGS(ls, lkb, lksb),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(u8, sb_flags)
+		__field(int, sb_status)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->sb_flags = lksb->sb_flags;
+		__entry->sb_status = lksb->sb_status;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status)
+
+);
+
+/* note: we begin tracing dlm_unlock_start() only if ls and lkb are found */
+TRACE_EVENT(dlm_unlock_start,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags),
+
+	TP_ARGS(ls, lkb, flags),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(__u32, flags)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->flags = flags;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x flags=%s",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_flags(__entry->flags))
+
+);
+
+TRACE_EVENT(dlm_unlock_end,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags,
+		 int error),
+
+	TP_ARGS(ls, lkb, flags, error),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(__u32, flags)
+		__field(int, error)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->flags = flags;
+		__entry->error = error;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_flags(__entry->flags), __entry->error)
+
+);
+
+#endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 92732376fd29462b502f41486bcef55f49c5713e Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:16 -0400
Subject: fs: dlm: trace socket handling

This patch adds tracepoints for dlm socket receive and send
functionality. We can use it to track how much data was send or received
to or from a specific nodeid.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/trace/events/dlm.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
index c97b4c163c3e..32088c603244 100644
--- a/include/trace/events/dlm.h
+++ b/include/trace/events/dlm.h
@@ -214,6 +214,46 @@ TRACE_EVENT(dlm_unlock_end,
 
 );
 
+TRACE_EVENT(dlm_send,
+
+	TP_PROTO(int nodeid, int ret),
+
+	TP_ARGS(nodeid, ret),
+
+	TP_STRUCT__entry(
+		__field(int, nodeid)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->nodeid = nodeid;
+		__entry->ret = ret;
+	),
+
+	TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret)
+
+);
+
+TRACE_EVENT(dlm_recv,
+
+	TP_PROTO(int nodeid, int ret),
+
+	TP_ARGS(nodeid, ret),
+
+	TP_STRUCT__entry(
+		__field(int, nodeid)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->nodeid = nodeid;
+		__entry->ret = ret;
+	),
+
+	TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret)
+
+);
+
 #endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 7c7e3d31e7856a8260a254f8c71db416f7f9f5a1 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 5 Nov 2021 16:23:29 -0700
Subject: bpf: Introduce helper bpf_find_vma

In some profiler use cases, it is necessary to map an address to the
backing file, e.g., a shared library. bpf_find_vma helper provides a
flexible way to achieve this. bpf_find_vma maps an address of a task to
the vma (vm_area_struct) for this address, and feed the vma to an callback
BPF function. The callback function is necessary here, as we need to
ensure mmap_sem is unlocked.

It is necessary to lock mmap_sem for find_vma. To lock and unlock mmap_sem
safely when irqs are disable, we use the same mechanism as stackmap with
build_id. Specifically, when irqs are disabled, the unlocked is postponed
in an irq_work. Refactor stackmap.c so that the irq_work is shared among
bpf_find_vma and stackmap helpers.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Hengqi Chen <hengqi.chen@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20211105232330.1936330-2-songliubraving@fb.com
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2be6dfd68df9..df3410bff4b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2157,6 +2157,7 @@ extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
+extern const struct bpf_func_proto bpf_find_vma_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ba5af15e25f5..509eee5f0393 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4938,6 +4938,25 @@ union bpf_attr {
  *		**-ENOENT** if symbol is not found.
  *
  *		**-EPERM** if caller does not have permission to obtain kernel address.
+ *
+ * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		Find vma of *task* that contains *addr*, call *callback_fn*
+ *		function with *task*, *vma*, and *callback_ctx*.
+ *		The *callback_fn* should be a static function and
+ *		the *callback_ctx* should be a pointer to the stack.
+ *		The *flags* is used to control certain aspects of the helper.
+ *		Currently, the *flags* must be 0.
+ *
+ *		The expected callback signature is
+ *
+ *		long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx);
+ *
+ *	Return
+ *		0 on success.
+ *		**-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
+ *		**-EBUSY** if failed to try lock mmap_lock.
+ *		**-EINVAL** for invalid **flags**.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5120,6 +5139,7 @@ union bpf_attr {
 	FN(trace_vprintk),		\
 	FN(skc_to_unix_sock),		\
 	FN(kallsyms_lookup_name),	\
+	FN(find_vma),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From fa443bc3c1e4b28d9315dea882e8358ba6e26f8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Fri, 29 Oct 2021 17:28:56 +0200
Subject: HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows to selectively autoload drivers for ISH devices.
Currently all ISH drivers are loaded for all systems having any ISH
device.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/mod_devicetable.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ae2e75d15b21..befbf53c4b7c 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -895,4 +895,17 @@ struct dfl_device_id {
 	kernel_ulong_t driver_data;
 };
 
+/* ISHTP (Integrated Sensor Hub Transport Protocol) */
+
+#define ISHTP_MODULE_PREFIX	"ishtp:"
+
+/**
+ * struct ishtp_device_id - ISHTP device identifier
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @context: pointer to driver specific data
+ */
+struct ishtp_device_id {
+	guid_t guid;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
-- 
cgit v1.2.3


From 64355db3caf6468dc711995239efe0cbcd7d0091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 10 Nov 2021 13:16:55 +0100
Subject: mod_devicetable: fix kdocs for ishtp_device_id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kdocs were copied from another device_id struct and not adapted.

Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/mod_devicetable.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index befbf53c4b7c..c70abe7aaef2 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -901,8 +901,7 @@ struct dfl_device_id {
 
 /**
  * struct ishtp_device_id - ISHTP device identifier
- * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
- * @context: pointer to driver specific data
+ * @guid: GUID of the device.
  */
 struct ishtp_device_id {
 	guid_t guid;
-- 
cgit v1.2.3


From 5d5e4522a7f404d1a96fd6c703989d32a9c9568d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 7 Nov 2021 14:51:16 +1000
Subject: printk: restore flushing of NMI buffers on remote CPUs after NMI
 backtraces

printk from NMI context relies on irq work being raised on the local CPU
to print to console. This can be a problem if the NMI was raised by a
lockup detector to print lockup stack and regs, because the CPU may not
enable irqs (because it is locked up).

Introduce printk_trigger_flush() that can be called another CPU to try
to get those messages to the console, call that where printk_safe_flush
was previously called.

Fixes: 93d102f094be ("printk: remove safe buffers")
Cc: stable@vger.kernel.org # 5.15
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20211107045116.1754411-1-npiggin@gmail.com
---
 include/linux/printk.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index a1379df43251..596ad6fa0336 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -206,6 +206,7 @@ void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
+void printk_trigger_flush(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -282,6 +283,9 @@ static inline void dump_stack_lvl(const char *log_lvl)
 static inline void dump_stack(void)
 {
 }
+static inline void printk_trigger_flush(void)
+{
+}
 #endif
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From f89315650ba34ec6c91a8bded72796980bee2a4d Mon Sep 17 00:00:00 2001
From: Mark Pashmfouroush <markpash@cloudflare.com>
Date: Wed, 10 Nov 2021 11:10:15 +0000
Subject: bpf: Add ingress_ifindex to bpf_sk_lookup

It may be helpful to have access to the ifindex during bpf socket
lookup. An example may be to scope certain socket lookup logic to
specific interfaces, i.e. an interface may be made exempt from custom
lookup code.

Add the ifindex of the arriving connection to the bpf_sk_lookup API.

Signed-off-by: Mark Pashmfouroush <markpash@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211110111016.5670-2-markpash@cloudflare.com
---
 include/linux/filter.h   | 7 +++++--
 include/uapi/linux/bpf.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 24b7ed2677af..b6a216eb217a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1374,6 +1374,7 @@ struct bpf_sk_lookup_kern {
 		const struct in6_addr *daddr;
 	} v6;
 	struct sock	*selected_sk;
+	u32		ingress_ifindex;
 	bool		no_reuseport;
 };
 
@@ -1436,7 +1437,7 @@ extern struct static_key_false bpf_sk_lookup_enabled;
 static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
 					const __be32 saddr, const __be16 sport,
 					const __be32 daddr, const u16 dport,
-					struct sock **psk)
+					const int ifindex, struct sock **psk)
 {
 	struct bpf_prog_array *run_array;
 	struct sock *selected_sk = NULL;
@@ -1452,6 +1453,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
 			.v4.daddr	= daddr,
 			.sport		= sport,
 			.dport		= dport,
+			.ingress_ifindex	= ifindex,
 		};
 		u32 act;
 
@@ -1474,7 +1476,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 					const __be16 sport,
 					const struct in6_addr *daddr,
 					const u16 dport,
-					struct sock **psk)
+					const int ifindex, struct sock **psk)
 {
 	struct bpf_prog_array *run_array;
 	struct sock *selected_sk = NULL;
@@ -1490,6 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 			.v6.daddr	= daddr,
 			.sport		= sport,
 			.dport		= dport,
+			.ingress_ifindex	= ifindex,
 		};
 		u32 act;
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 509eee5f0393..6297eafdc40f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6316,6 +6316,7 @@ struct bpf_sk_lookup {
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
+	__u32 ingress_ifindex;		/* The arriving interface. Determined by inet_iif. */
 };
 
 /*
-- 
cgit v1.2.3


From a25efb3863d068929f0bbeb87a995df11507e691 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 23 Sep 2021 13:57:42 +0200
Subject: dma-buf: add dma_fence_describe and dma_resv_describe v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add functions to dump dma_fence and dma_resv objects into a seq_file and
use them for printing the debugfs information.

v2: fix missing include reported by test robot.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211103081231.18578-2-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 1 +
 include/linux/dma-resv.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index a706b7bf51d7..1ea691753bd3 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -264,6 +264,7 @@ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 
 void dma_fence_release(struct kref *kref);
 void dma_fence_free(struct dma_fence *fence);
+void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq);
 
 /**
  * dma_fence_put - decreases refcount of the fence
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index dbd235ab447f..09c6063b199a 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -490,5 +490,6 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
 long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
 			   unsigned long timeout);
 bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all);
+void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
 
 #endif /* _LINUX_RESERVATION_H */
-- 
cgit v1.2.3


From 396d9b9a48723995416a05f0dab80c1dd6c21e80 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 8 Nov 2021 11:28:46 +0100
Subject: drm: Update documentation and TODO of gem_prime_mmap hook

The hook gem_prime_mmap in struct drm_driver is deprecated. Document
the new requirements.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20211108102846.309-4-tzimmermann@suse.de
---
 include/drm/drm_drv.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 0cd95953cdf5..a84eb4028e5b 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -345,11 +345,14 @@ struct drm_driver {
 	 * mmap hook for GEM drivers, used to implement dma-buf mmap in the
 	 * PRIME helpers.
 	 *
-	 * FIXME: There's way too much duplication going on here, and also moved
-	 * to &drm_gem_object_funcs.
+	 * This hook only exists for historical reasons. Drivers must use
+	 * drm_gem_prime_mmap() to implement it.
+	 *
+	 * FIXME: Convert all drivers to implement mmap in struct
+	 * &drm_gem_object_funcs and inline drm_gem_prime_mmap() into
+	 * its callers. This hook should be removed afterwards.
 	 */
-	int (*gem_prime_mmap)(struct drm_gem_object *obj,
-				struct vm_area_struct *vma);
+	int (*gem_prime_mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma);
 
 	/**
 	 * @dumb_create:
-- 
cgit v1.2.3


From 452290f354f04fca53cba810bd241f4307502f1e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:36:54 +0100
Subject: drm/format-helper: Export drm_fb_clip_offset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a function that computes the offset into a blit destination
buffer. This will allow to move destination-buffer clipping into the
format-helper callers.

v4:
	* add missing '@' for parameter documentation
	* fix typo 'frambuffer'
v2:
	* provide documentation (Sam)
	* return 'unsigned int' (Sam, Noralf)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-2-tzimmermann@suse.de
---
 include/drm/drm_format_helper.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index e86925cf07b9..f5a8b334b18d 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -6,9 +6,13 @@
 #ifndef __LINUX_DRM_FORMAT_HELPER_H
 #define __LINUX_DRM_FORMAT_HELPER_H
 
+struct drm_format_info;
 struct drm_framebuffer;
 struct drm_rect;
 
+unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format,
+				const struct drm_rect *clip);
+
 void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 		   struct drm_rect *clip);
 void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
-- 
cgit v1.2.3


From 27bd66dd6419c45e320f34ed419cd80833de1161 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:36:55 +0100
Subject: drm/format-helper: Rework format-helper memcpy functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move destination-buffer clipping from all format-helper memcpy
function into callers. Support destination-buffer pitch. Only
distinguish between system and I/O memory, but use same logic
everywhere.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Tested-by: Noralf Trønnes <noralf@tronnes.org>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-3-tzimmermann@suse.de
---
 include/drm/drm_format_helper.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index f5a8b334b18d..1fc3ba7b6060 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -13,11 +13,10 @@ struct drm_rect;
 unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format,
 				const struct drm_rect *clip);
 
-void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
-		   struct drm_rect *clip);
-void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr,
-			   struct drm_framebuffer *fb,
-			   struct drm_rect *clip);
+void drm_fb_memcpy(void *dst, unsigned int dst_pitch, const void *vaddr,
+		   const struct drm_framebuffer *fb, const struct drm_rect *clip);
+void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *vaddr,
+			const struct drm_framebuffer *fb, const struct drm_rect *clip);
 void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
 		 struct drm_rect *clip, bool cached);
 void drm_fb_xrgb8888_to_rgb332(void *dst, void *vaddr, struct drm_framebuffer *fb,
-- 
cgit v1.2.3


From 3e3543c8a19cade02cefde83ee97233d5d90e7e5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:36:56 +0100
Subject: drm/format-helper: Add destination-buffer pitch to drm_fb_swab()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add destination-buffer pitch as argument to drm_fb_swab(). Done for
consistency with the rest of the interface.

v2:
	* update documentation (Noralf)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Tested-by: Noralf Trønnes <noralf@tronnes.org>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-4-tzimmermann@suse.de
---
 include/drm/drm_format_helper.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 1fc3ba7b6060..ddcba5abe306 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -17,8 +17,9 @@ void drm_fb_memcpy(void *dst, unsigned int dst_pitch, const void *vaddr,
 		   const struct drm_framebuffer *fb, const struct drm_rect *clip);
 void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *vaddr,
 			const struct drm_framebuffer *fb, const struct drm_rect *clip);
-void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
-		 struct drm_rect *clip, bool cached);
+void drm_fb_swab(void *dst, unsigned int dst_pitch, const void *src,
+		 const struct drm_framebuffer *fb, const struct drm_rect *clip,
+		 bool cached);
 void drm_fb_xrgb8888_to_rgb332(void *dst, void *vaddr, struct drm_framebuffer *fb,
 			       struct drm_rect *clip);
 void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
-- 
cgit v1.2.3


From 53bc2098d2b6ccff25fe13f9345cbb5c0ef34a99 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:36:57 +0100
Subject: drm/format-helper: Rework format-helper conversion functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move destination-buffer clipping from all format-helper conversion
functions into callers. Support destination-buffer pitch. Only
distinguish between system and I/O memory, but use same logic
everywhere.

Simply harmonize the interface and semantics of the existing code.
Not all conversion helpers support all combinations of parameters.
We have to add additional features when we need them.

v2:
	* fix default destination pitch in drm_fb_xrgb8888_to_gray8()
	  (Noralf)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Tested-by: Noralf Trønnes <noralf@tronnes.org>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-5-tzimmermann@suse.de
---
 include/drm/drm_format_helper.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index ddcba5abe306..6b366f422a24 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -20,21 +20,21 @@ void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *v
 void drm_fb_swab(void *dst, unsigned int dst_pitch, const void *src,
 		 const struct drm_framebuffer *fb, const struct drm_rect *clip,
 		 bool cached);
-void drm_fb_xrgb8888_to_rgb332(void *dst, void *vaddr, struct drm_framebuffer *fb,
-			       struct drm_rect *clip);
-void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
-			       struct drm_framebuffer *fb,
-			       struct drm_rect *clip, bool swab);
-void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch,
-				       void *vaddr, struct drm_framebuffer *fb,
-				       struct drm_rect *clip, bool swab);
-void drm_fb_xrgb8888_to_rgb888(void *dst, void *src, struct drm_framebuffer *fb,
-			       struct drm_rect *clip);
-void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch,
-				       void *vaddr, struct drm_framebuffer *fb,
-				       struct drm_rect *clip);
-void drm_fb_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb,
-			      struct drm_rect *clip);
+void drm_fb_xrgb8888_to_rgb332(void *dst, unsigned int dst_pitch, const void *vaddr,
+			       const struct drm_framebuffer *fb, const struct drm_rect *clip);
+void drm_fb_xrgb8888_to_rgb565(void *dst, unsigned int dst_pitch, const void *vaddr,
+			       const struct drm_framebuffer *fb, const struct drm_rect *clip,
+			       bool swab);
+void drm_fb_xrgb8888_to_rgb565_toio(void __iomem *dst, unsigned int dst_pitch,
+				    const void *vaddr, const struct drm_framebuffer *fb,
+				    const struct drm_rect *clip, bool swab);
+void drm_fb_xrgb8888_to_rgb888(void *dst, unsigned int dst_pitch, const void *src,
+			       const struct drm_framebuffer *fb, const struct drm_rect *clip);
+void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch,
+				    const void *vaddr, const struct drm_framebuffer *fb,
+				    const struct drm_rect *clip);
+void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr,
+			      const struct drm_framebuffer *fb, const struct drm_rect *clip);
 
 int drm_fb_blit_rect_dstclip(void __iomem *dst, unsigned int dst_pitch,
 			     uint32_t dst_format, void *vmap,
-- 
cgit v1.2.3


From 19b20a8021315fce38ae95607e5de389913471a7 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:36:58 +0100
Subject: drm/format-helper: Streamline blit-helper interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move destination-buffer clipping from format-helper blit function into
caller. Rename drm_fb_blit_rect_dstclip() to drm_fb_blit_toio(). Done for
consistency with the rest of the interface. Remove drm_fb_blit_dstclip(),
which isn't required.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-6-tzimmermann@suse.de
---
 include/drm/drm_format_helper.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 6b366f422a24..97e4c3223af3 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -36,12 +36,8 @@ void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch,
 void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr,
 			      const struct drm_framebuffer *fb, const struct drm_rect *clip);
 
-int drm_fb_blit_rect_dstclip(void __iomem *dst, unsigned int dst_pitch,
-			     uint32_t dst_format, void *vmap,
-			     struct drm_framebuffer *fb,
-			     struct drm_rect *rect);
-int drm_fb_blit_dstclip(void __iomem *dst, unsigned int dst_pitch,
-			uint32_t dst_format, void *vmap,
-			struct drm_framebuffer *fb);
+int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_format,
+		     const void *vmap, const struct drm_framebuffer *fb,
+		     const struct drm_rect *rect);
 
 #endif /* __LINUX_DRM_FORMAT_HELPER_H */
-- 
cgit v1.2.3


From 9239f3e1807c282e3c6bced510640910e9b25c60 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:37:01 +0100
Subject: drm/simpledrm: Support virtual screen sizes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add constants for the maximum size of the shadow-plane surface
size. Useful for shadow planes with virtual screen sizes. The
current sizes are 4096 scanlines with 4096 pixels each. This
seems reasonable for current hardware, but can be increased as
necessary.

In simpledrm, set the maximum framebuffer size from the constants
for shadow planes. Implements support for virtual screen sizes and
page flipping on the fbdev console.

v3:
	* use decimal numbers for shadow-plane constants (Noralf)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-9-tzimmermann@suse.de
---
 include/drm/drm_gem_atomic_helper.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h
index 48222a107873..0b1e2dd2ac3f 100644
--- a/include/drm/drm_gem_atomic_helper.h
+++ b/include/drm/drm_gem_atomic_helper.h
@@ -22,6 +22,24 @@ int drm_gem_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
  * Helpers for planes with shadow buffers
  */
 
+/**
+ * DRM_SHADOW_PLANE_MAX_WIDTH - Maximum width of a plane's shadow buffer in pixels
+ *
+ * For drivers with shadow planes, the maximum width of the framebuffer is
+ * usually independent from hardware limitations. Drivers can initialize struct
+ * drm_mode_config.max_width from DRM_SHADOW_PLANE_MAX_WIDTH.
+ */
+#define DRM_SHADOW_PLANE_MAX_WIDTH	(4096u)
+
+/**
+ * DRM_SHADOW_PLANE_MAX_HEIGHT - Maximum height of a plane's shadow buffer in scanlines
+ *
+ * For drivers with shadow planes, the maximum height of the framebuffer is
+ * usually independent from hardware limitations. Drivers can initialize struct
+ * drm_mode_config.max_height from DRM_SHADOW_PLANE_MAX_HEIGHT.
+ */
+#define DRM_SHADOW_PLANE_MAX_HEIGHT	(4096u)
+
 /**
  * struct drm_shadow_plane_state - plane state for planes with shadow buffers
  *
-- 
cgit v1.2.3


From 84e9dfd5185285df55ae9068c89cde1a88baa7b7 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 10 Nov 2021 11:37:02 +0100
Subject: drm: Clarify semantics of struct drm_mode_config.{min, max}_{width,
 height}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add additional information on the semantics of the size fields in
struct drm_mode_config. Also add a TODO to review all driver for
correct usage of these fields.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-10-tzimmermann@suse.de
---
 include/drm/drm_mode_config.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 48b7de80daf5..91ca575a78de 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -359,6 +359,19 @@ struct drm_mode_config_funcs {
  * Core mode resource tracking structure.  All CRTC, encoders, and connectors
  * enumerated by the driver are added here, as are global properties.  Some
  * global restrictions are also here, e.g. dimension restrictions.
+ *
+ * Framebuffer sizes refer to the virtual screen that can be displayed by
+ * the CRTC. This can be different from the physical resolution programmed.
+ * The minimum width and height, stored in @min_width and @min_height,
+ * describe the smallest size of the framebuffer. It correlates to the
+ * minimum programmable resolution.
+ * The maximum width, stored in @max_width, is typically limited by the
+ * maximum pitch between two adjacent scanlines. The maximum height, stored
+ * in @max_height, is usually only limited by the amount of addressable video
+ * memory. For hardware that has no real maximum, drivers should pick a
+ * reasonable default.
+ *
+ * See also @DRM_SHADOW_PLANE_MAX_WIDTH and @DRM_SHADOW_PLANE_MAX_HEIGHT.
  */
 struct drm_mode_config {
 	/**
-- 
cgit v1.2.3


From bf9167a8b40c9cf463521da05342db81808c1b6e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 Nov 2021 09:56:33 +0100
Subject: HID: intel-ish-hid: fix module device-id handling

A late addititon to the intel-ish-hid framework caused a build failure
with clang, and introduced an ABI to the module loader that stops working
if any driver ever needs to bind to more than one UUID:

drivers/hid/intel-ish-hid/ishtp-fw-loader.c:1067:4: error: initializer element is not a compile-time constant

Change the ishtp_device_id to have correct documentation and a driver_data
field like all the other ones, and change the drivers to use the ID table
as the primary identification in a way that works with all compilers
and avoids duplciating the identifiers.

Fixes: f155dfeaa4ee ("platform/x86: isthp_eclite: only load for matching devices")
Fixes: facfe0a4fdce ("platform/chrome: chros_ec_ishtp: only load for matching devices")
Fixes: 0d0cccc0fd83 ("HID: intel-ish-hid: hid-client: only load for matching devices")
Fixes: 44e2a58cb880 ("HID: intel-ish-hid: fw-loader: only load for matching devices")
Fixes: cb1a2c6847f7 ("HID: intel-ish-hid: use constants for modaliases")
Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
[jkosina@suse.cz: fix ecl_ishtp_cl_driver.id initialization]
[jkosina@suse.cz: fix conflict with already fixed kerneldoc]
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/intel-ish-client-if.h | 4 ++--
 include/linux/mod_devicetable.h     | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index aee8ff4739b1..f45f13304add 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -9,7 +9,7 @@
 #define _INTEL_ISH_CLIENT_IF_H_
 
 #include <linux/device.h>
-#include <linux/uuid.h>
+#include <linux/mod_devicetable.h>
 
 struct ishtp_cl_device;
 struct ishtp_device;
@@ -40,7 +40,7 @@ enum cl_state {
 struct ishtp_cl_driver {
 	struct device_driver driver;
 	const char *name;
-	const guid_t *guid;
+	const struct ishtp_device_id *id;
 	int (*probe)(struct ishtp_cl_device *dev);
 	void (*remove)(struct ishtp_cl_device *dev);
 	int (*reset)(struct ishtp_cl_device *dev);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index c70abe7aaef2..4bb71979a8fd 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -902,9 +902,11 @@ struct dfl_device_id {
 /**
  * struct ishtp_device_id - ISHTP device identifier
  * @guid: GUID of the device.
+ * @driver_data: pointer to driver specific data
  */
 struct ishtp_device_id {
 	guid_t guid;
+	kernel_ulong_t driver_data;
 };
 
 #endif /* LINUX_MOD_DEVICETABLE_H */
-- 
cgit v1.2.3


From 8c42d2fa4eeab6c37a0b1b1aa7a2715248ef4f34 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 11 Nov 2021 17:26:09 -0800
Subject: bpf: Support BTF_KIND_TYPE_TAG for btf_type_tag attributes

LLVM patches ([1] for clang, [2] and [3] for BPF backend)
added support for btf_type_tag attributes. This patch
added support for the kernel.

The main motivation for btf_type_tag is to bring kernel
annotations __user, __rcu etc. to btf. With such information
available in btf, bpf verifier can detect mis-usages
and reject the program. For example, for __user tagged pointer,
developers can then use proper helper like bpf_probe_read_user()
etc. to read the data.

BTF_KIND_TYPE_TAG may also useful for other tracing
facility where instead of to require user to specify
kernel/user address type, the kernel can detect it
by itself with btf.

  [1] https://reviews.llvm.org/D111199
  [2] https://reviews.llvm.org/D113222
  [3] https://reviews.llvm.org/D113496

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211112012609.1505032-1-yhs@fb.com
---
 include/uapi/linux/btf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index deb12f755f0f..b0d8fea1951d 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -43,7 +43,7 @@ struct btf_type {
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC, FUNC_PROTO, VAR and DECL_TAG.
+	 * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -75,6 +75,7 @@ enum {
 	BTF_KIND_DATASEC	= 15,	/* Section	*/
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
 	BTF_KIND_DECL_TAG	= 17,	/* Decl Tag */
+	BTF_KIND_TYPE_TAG	= 18,	/* Type Tag */
 
 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
-- 
cgit v1.2.3


From 9e2ad638ae3632ef916ceb39f70e3104bf8fdc97 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 12 Nov 2021 07:02:42 -0800
Subject: bpf: Extend BTF_ID_LIST_GLOBAL with parameter for number of IDs

syzbot reported the following BUG w/o CONFIG_DEBUG_INFO_BTF

BUG: KASAN: global-out-of-bounds in task_iter_init+0x212/0x2e7 kernel/bpf/task_iter.c:661
Read of size 4 at addr ffffffff90297404 by task swapper/0/1

CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.15.0-syzkaller #0
Hardware name: ... Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_address_description.constprop.0.cold+0xf/0x309 mm/kasan/report.c:256
__kasan_report mm/kasan/report.c:442 [inline]
kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
task_iter_init+0x212/0x2e7 kernel/bpf/task_iter.c:661
do_one_initcall+0x103/0x650 init/main.c:1295
do_initcall_level init/main.c:1368 [inline]
do_initcalls init/main.c:1384 [inline]
do_basic_setup init/main.c:1403 [inline]
kernel_init_freeable+0x6b1/0x73a init/main.c:1606
kernel_init+0x1a/0x1d0 init/main.c:1497
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
</TASK>

This is caused by hard-coded name[1] in BTF_ID_LIST_GLOBAL (w/o
CONFIG_DEBUG_INFO_BTF). Fix this by adding a parameter n to
BTF_ID_LIST_GLOBAL. This avoids ifdef CONFIG_DEBUG_INFO_BTF in btf.c and
filter.c.

Fixes: 7c7e3d31e785 ("bpf: Introduce helper bpf_find_vma")
Reported-by: syzbot+e0d81ec552a21d9071aa@syzkaller.appspotmail.com
Reported-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20211112150243.1270987-2-songliubraving@fb.com
---
 include/linux/btf_ids.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 47d9abfbdb55..6bb42b785293 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -73,7 +73,7 @@ asm(							\
 __BTF_ID_LIST(name, local)				\
 extern u32 name[];
 
-#define BTF_ID_LIST_GLOBAL(name)			\
+#define BTF_ID_LIST_GLOBAL(name, n)			\
 __BTF_ID_LIST(name, globl)
 
 /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
@@ -83,7 +83,7 @@ __BTF_ID_LIST(name, globl)
 	BTF_ID_LIST(name) \
 	BTF_ID(prefix, typename)
 #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
-	BTF_ID_LIST_GLOBAL(name) \
+	BTF_ID_LIST_GLOBAL(name, 1)			  \
 	BTF_ID(prefix, typename)
 
 /*
@@ -149,7 +149,7 @@ extern struct btf_id_set name;
 #define BTF_ID_LIST(name) static u32 name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
 #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
 #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
 #define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-- 
cgit v1.2.3


From d19ddb476a539fd78ad1028ae13bb38506286931 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 12 Nov 2021 07:02:43 -0800
Subject: bpf: Introduce btf_tracing_ids

Similar to btf_sock_ids, btf_tracing_ids provides btf ID for task_struct,
file, and vm_area_struct via easy to understand format like
btf_tracing_ids[BTF_TRACING_TYPE_[TASK|file|VMA]].

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20211112150243.1270987-3-songliubraving@fb.com
---
 include/linux/btf_ids.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 6bb42b785293..919c0fde1c51 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -189,6 +189,18 @@ MAX_BTF_SOCK_TYPE,
 extern u32 btf_sock_ids[];
 #endif
 
-extern u32 btf_task_struct_ids[];
+#define BTF_TRACING_TYPE_xxx	\
+	BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct)	\
+	BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file)		\
+	BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct)
+
+enum {
+#define BTF_TRACING_TYPE(name, type) name,
+BTF_TRACING_TYPE_xxx
+#undef BTF_TRACING_TYPE
+MAX_BTF_TRACING_TYPE,
+};
+
+extern u32 btf_tracing_ids[];
 
 #endif
-- 
cgit v1.2.3


From 1aa3b2207e889a948049c9a8016cedb0218c2389 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 12 Nov 2021 18:18:10 -0500
Subject: net,lsm,selinux: revert the security_sctp_assoc_established() hook

This patch reverts two prior patches, e7310c94024c
("security: implement sctp_assoc_established hook in selinux") and
7c2ef0240e6a ("security: add sctp_assoc_established hook"), which
create the security_sctp_assoc_established() LSM hook and provide a
SELinux implementation.  Unfortunately these two patches were merged
without proper review (the Reviewed-by and Tested-by tags from
Richard Haines were for previous revisions of these patches that
were significantly different) and there are outstanding objections
from the SELinux maintainers regarding these patches.

Work is currently ongoing to correct the problems identified in the
reverted patches, as well as others that have come up during review,
but it is unclear at this point in time when that work will be ready
for inclusion in the mainline kernel.  In the interest of not keeping
objectionable code in the kernel for multiple weeks, and potentially
a kernel release, we are reverting the two problematic patches.

Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/lsm_hook_defs.h | 2 --
 include/linux/lsm_hooks.h     | 5 -----
 include/linux/security.h      | 7 -------
 3 files changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 442a611fa0fb..df8de62f4710 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -335,8 +335,6 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
 	 struct sockaddr *address, int addrlen)
 LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
-LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc,
-	 struct sk_buff *skb)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d6823214d5c1..d45b6f6e27fd 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1050,11 +1050,6 @@
  *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
  *	@newsk pointer to new sock structure.
- * @sctp_assoc_established:
- *	Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
- *	to the security module.
- *	@asoc pointer to sctp association structure.
- *	@skb pointer to skbuff of association packet.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/security.h b/include/linux/security.h
index 06eac4e61a13..bbf44a466832 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1430,8 +1430,6 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
 			       struct sockaddr *address, int addrlen);
 void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1651,11 +1649,6 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc,
 					  struct sock *newsk)
 {
 }
-
-static inline void security_sctp_assoc_established(struct sctp_association *asoc,
-						   struct sk_buff *skb)
-{
-}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
-- 
cgit v1.2.3


From 938aa33f14657c9ed9deea348b7d6f14b6d69cb7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 14 Nov 2021 13:28:34 -0500
Subject: tracing: Add length protection to histogram string copies

The string copies to the histogram storage has a max size of 256 bytes
(defined by MAX_FILTER_STR_VAL). Only the string size of the event field
needs to be copied to the event storage, but no more than what is in the
event storage. Although nothing should be bigger than 256 bytes, there's
no protection against overwriting of the storage if one day there is.

Copy no more than the destination size, and enforce it.

Also had to turn MAX_FILTER_STR_VAL into an unsigned int, to keep the
min() comparison of the string sizes of comparable types.

Link: https://lore.kernel.org/all/CAHk-=wjREUihCGrtRBwfX47y_KrLCGjiq3t6QtoNJpmVrAEb1w@mail.gmail.com/
Link: https://lkml.kernel.org/r/20211114132834.183429a4@rorschach.local.home

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 63f84ae6b82b ("tracing/histogram: Do not copy the fixed-size char array field over the field size")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 50453b287615..2d167ac3452c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -673,7 +673,7 @@ struct trace_event_file {
 
 #define PERF_MAX_TRACE_SIZE	8192
 
-#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
+#define MAX_FILTER_STR_VAL	256U	/* Should handle KSYM_SYMBOL_LEN */
 
 enum event_trigger_type {
 	ETT_NONE		= (0),
-- 
cgit v1.2.3


From 0a08088f82c2db9cd6bce3258c79a76980c77651 Mon Sep 17 00:00:00 2001
From: Dillon Min <dillon.minfei@gmail.com>
Date: Tue, 19 Oct 2021 09:43:18 +0100
Subject: media: v4l2-mem2mem: add v4l2_m2m_get_unmapped_area for no-mmu
 platform

For platforms without MMU the m2m provides a helper method
v4l2_m2m_get_unmapped_area(), The mmap() routines will call
this to get a proposed address for the mapping.

More detailed information about get_unmapped_area can be found in
Documentation/nommu-mmap.txt

Signed-off-by: Dillon Min <dillon.minfei@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-mem2mem.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 5a91b548ecc0..fdbd5257e020 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -495,6 +495,11 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 int v4l2_m2m_mmap(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 		  struct vm_area_struct *vma);
 
+#ifndef CONFIG_MMU
+unsigned long v4l2_m2m_get_unmapped_area(struct file *file, unsigned long addr,
+					 unsigned long len, unsigned long pgoff,
+					 unsigned long flags);
+#endif
 /**
  * v4l2_m2m_init() - initialize per-driver m2m data
  *
-- 
cgit v1.2.3


From ef9f18a9e3a04126cf017216193166abe03dedef Mon Sep 17 00:00:00 2001
From: Dillon Min <dillon.minfei@gmail.com>
Date: Tue, 19 Oct 2021 09:43:21 +0100
Subject: media: v4l2-ctrls: Add RGB color effects control

Add V4L2_COLORFX_SET_RGB color effects control, V4L2_CID_COLORFX_RGB
for RGB color setting.

with two mirror changes:
- change 0xFFFFFF to 0xffffff
- fix comments 2^24 to 2^24 - 1

[hverkuil: dropped spaces around + with V4L2_CID_BASE for consistency]

Signed-off-by: Dillon Min <dillon.minfei@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 5fea5feb0412..c9eea93a43a9 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -128,6 +128,7 @@ enum v4l2_colorfx {
 	V4L2_COLORFX_SOLARIZATION		= 13,
 	V4L2_COLORFX_ANTIQUE			= 14,
 	V4L2_COLORFX_SET_CBCR			= 15,
+	V4L2_COLORFX_SET_RGB			= 16,
 };
 #define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
 #define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
@@ -145,9 +146,10 @@ enum v4l2_colorfx {
 
 #define V4L2_CID_ALPHA_COMPONENT		(V4L2_CID_BASE+41)
 #define V4L2_CID_COLORFX_CBCR			(V4L2_CID_BASE+42)
+#define V4L2_CID_COLORFX_RGB			(V4L2_CID_BASE+43)
 
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+43)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+44)
 
 /* USER-class private control IDs */
 
-- 
cgit v1.2.3


From 4c7924fb905b02323ff6d9d20f370892615dccfa Mon Sep 17 00:00:00 2001
From: Julien Massot <julien.massot@iot.bzh>
Date: Fri, 22 Oct 2021 14:21:01 +0200
Subject: soc: renesas: rcar-rst: Add support to set rproc boot address

R-Car Gen3 SoC series has a realtime processor, the boot
address of this processor can be set thanks to CR7BAR register
of the reset module.

Export this function so that it's possible to set the boot
address from a remoteproc driver.

Also drop the __initdata qualifier on rcar_rst_base,
since we will use this address later than init time.

Signed-off-by: Julien Massot <julien.massot@iot.bzh>
Link: https://lore.kernel.org/r/20211022122101.66998-1-julien.massot@iot.bzh
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/linux/soc/renesas/rcar-rst.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h
index 7899a5b8c247..1f1fe8bfaa76 100644
--- a/include/linux/soc/renesas/rcar-rst.h
+++ b/include/linux/soc/renesas/rcar-rst.h
@@ -4,8 +4,10 @@
 
 #ifdef CONFIG_RST_RCAR
 int rcar_rst_read_mode_pins(u32 *mode);
+int rcar_rst_set_rproc_boot_addr(u64 boot_addr);
 #else
 static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; }
+static inline int rcar_rst_set_rproc_boot_addr(u64 boot_addr) { return -ENODEV; }
 #endif
 
 #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */
-- 
cgit v1.2.3


From 032816fbbfafe3198bb5c71fbbe4e8e5be33b352 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Wed, 27 Oct 2021 14:45:07 +0100
Subject: pinctrl: pinconf-generic: Add support for "output-impedance-ohms" to
 be extracted from DT files

Add "output-impedance-ohms" property to generic options used for DT
parsing files. This enables drivers, which use generic pin configurations,
to get the value passed to this property.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20211027134509.5036-3-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index eee0e3948537..2422211d6a5a 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -91,6 +91,8 @@ struct pinctrl_map;
  * 	configuration (eg. the currently selected mux function) drive values on
  * 	the line. Use argument 1 to enable output mode, argument 0 to disable
  * 	it.
+ * @PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS: this will configure the output impedance
+ * 	of the pin with the value passed as argument. The argument is in ohms.
  * @PIN_CONFIG_PERSIST_STATE: retain pin state across sleep or controller reset
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
@@ -129,6 +131,7 @@ enum pin_config_param {
 	PIN_CONFIG_MODE_PWM,
 	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_OUTPUT_ENABLE,
+	PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS,
 	PIN_CONFIG_PERSIST_STATE,
 	PIN_CONFIG_POWER_SOURCE,
 	PIN_CONFIG_SKEW_DELAY,
-- 
cgit v1.2.3


From 5a363c20673308e968b6640deb73d7bf77e8b463 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 8 Nov 2021 10:31:47 +0100
Subject: drm/shmem-helper: Unexport drm_gem_shmem_create_with_handle()

Turn drm_gem_shmem_create_with_handle() into an internal helper
function. It's not used outside of the compilation unit.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20211108093149.7226-2-tzimmermann@suse.de
---
 include/drm/drm_gem_shmem_helper.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 434328d8a0d9..6b47eb7d9f76 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -128,11 +128,6 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
 void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
 bool drm_gem_shmem_purge(struct drm_gem_object *obj);
 
-struct drm_gem_shmem_object *
-drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
-				 struct drm_device *dev, size_t size,
-				 uint32_t *handle);
-
 int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
 			      struct drm_mode_create_dumb *args);
 
-- 
cgit v1.2.3


From c7fbcb7149ff9321bbbcc93c9920de534ea8102c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 8 Nov 2021 10:31:48 +0100
Subject: drm/shmem-helper: Export dedicated wrappers for GEM object functions

Wrap GEM SHMEM functions for struct drm_gem_object_funcs and update
all callers. This will allow for an update of the public interfaces
of the GEM SHMEM helper library.

v2:
	* fix docs for drm_gem_shmem_object_print_info()

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20211108093149.7226-3-tzimmermann@suse.de
---
 include/drm/drm_gem_shmem_helper.h | 120 +++++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 6b47eb7d9f76..4199877ae588 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -137,6 +137,126 @@ void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent,
 			      const struct drm_gem_object *obj);
 
 struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj);
+
+/*
+ * GEM object functions
+ */
+
+/**
+ * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free_object()
+ * @obj: GEM object to free
+ *
+ * This function wraps drm_gem_shmem_free_object(). Drivers that employ the shmem helpers
+ * should use it as their &drm_gem_object_funcs.free handler.
+ */
+static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj)
+{
+	drm_gem_shmem_free_object(obj);
+}
+
+/**
+ * drm_gem_shmem_object_print_info() - Print &drm_gem_shmem_object info for debugfs
+ * @p: DRM printer
+ * @indent: Tab indentation level
+ * @obj: GEM object
+ *
+ * This function wraps drm_gem_shmem_print_info(). Drivers that employ the shmem helpers should
+ * use this function as their &drm_gem_object_funcs.print_info handler.
+ */
+static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent,
+						   const struct drm_gem_object *obj)
+{
+	drm_gem_shmem_print_info(p, indent, obj);
+}
+
+/**
+ * drm_gem_shmem_object_pin - GEM object function for drm_gem_shmem_pin()
+ * @obj: GEM object
+ *
+ * This function wraps drm_gem_shmem_pin(). Drivers that employ the shmem helpers should
+ * use it as their &drm_gem_object_funcs.pin handler.
+ */
+static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj)
+{
+	return drm_gem_shmem_pin(obj);
+}
+
+/**
+ * drm_gem_shmem_object_unpin - GEM object function for drm_gem_shmem_unpin()
+ * @obj: GEM object
+ *
+ * This function wraps drm_gem_shmem_unpin(). Drivers that employ the shmem helpers should
+ * use it as their &drm_gem_object_funcs.unpin handler.
+ */
+static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj)
+{
+	drm_gem_shmem_unpin(obj);
+}
+
+/**
+ * drm_gem_shmem_object_get_sg_table - GEM object function for drm_gem_shmem_get_sg_table()
+ * @obj: GEM object
+ *
+ * This function wraps drm_gem_shmem_get_sg_table(). Drivers that employ the shmem helpers should
+ * use it as their &drm_gem_object_funcs.get_sg_table handler.
+ *
+ * Returns:
+ * A pointer to the scatter/gather table of pinned pages or NULL on failure.
+ */
+static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj)
+{
+	return drm_gem_shmem_get_sg_table(obj);
+}
+
+/*
+ * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap()
+ * @obj: GEM object
+ * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store.
+ *
+ * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should
+ * use it as their &drm_gem_object_funcs.vmap handler.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+{
+	return drm_gem_shmem_vmap(obj, map);
+}
+
+/*
+ * drm_gem_shmem_object_vunmap - GEM object function for drm_gem_shmem_vunmap()
+ * @obj: GEM object
+ * @map: Kernel virtual address where the SHMEM GEM object was mapped
+ *
+ * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should
+ * use it as their &drm_gem_object_funcs.vunmap handler.
+ */
+static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+{
+	drm_gem_shmem_vunmap(obj, map);
+}
+
+/**
+ * drm_gem_shmem_object_mmap - GEM object function for drm_gem_shmem_mmap()
+ * @obj: GEM object
+ * @vma: VMA for the area to be mapped
+ *
+ * This function wraps drm_gem_shmem_mmap(). Drivers that employ the shmem helpers should
+ * use it as their &drm_gem_object_funcs.mmap handler.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	return drm_gem_shmem_mmap(obj, vma);
+}
+
+/*
+ * Driver ops
+ */
+
 struct drm_gem_object *
 drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
 				    struct dma_buf_attachment *attach,
-- 
cgit v1.2.3


From a193f3b4e050e35c506a34d0870c838d8e0b0449 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 8 Nov 2021 10:31:49 +0100
Subject: drm/shmem-helper: Pass GEM shmem object in public interfaces

Change all GEM SHMEM object functions that receive a GEM object
of type struct drm_gem_object to expect an object of type
struct drm_gem_shmem_object instead.

This change reduces the number of upcasts from struct drm_gem_object
by moving them into callers. The C compiler can now verify that the
GEM SHMEM functions are called with the correct type.

For consistency, the patch also renames drm_gem_shmem_free_object to
drm_gem_shmem_free. It further updates documentation for a number of
functions.

v3:
	* fix docs for drm_gem_shmem_object_free()
v2:
	* mention _object_ callbacks in docs (Daniel)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20211108093149.7226-4-tzimmermann@suse.de
---
 include/drm/drm_gem_shmem_helper.h | 69 ++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 4199877ae588..311d66c9cf4b 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -107,16 +107,17 @@ struct drm_gem_shmem_object {
 	container_of(obj, struct drm_gem_shmem_object, base)
 
 struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size);
-void drm_gem_shmem_free_object(struct drm_gem_object *obj);
+void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem);
 
 int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem);
 void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem);
-int drm_gem_shmem_pin(struct drm_gem_object *obj);
-void drm_gem_shmem_unpin(struct drm_gem_object *obj);
-int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
-void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map);
+int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem);
+void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem);
+int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map);
+void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map);
+int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma);
 
-int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv);
+int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv);
 
 static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem)
 {
@@ -125,33 +126,31 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
 		!shmem->base.dma_buf && !shmem->base.import_attach;
 }
 
-void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
-bool drm_gem_shmem_purge(struct drm_gem_object *obj);
+void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem);
+bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem);
 
-int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
-			      struct drm_mode_create_dumb *args);
-
-int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
-void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent,
-			      const struct drm_gem_object *obj);
+struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem);
+struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem);
 
-struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj);
+void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem,
+			      struct drm_printer *p, unsigned int indent);
 
 /*
  * GEM object functions
  */
 
 /**
- * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free_object()
+ * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free()
  * @obj: GEM object to free
  *
- * This function wraps drm_gem_shmem_free_object(). Drivers that employ the shmem helpers
+ * This function wraps drm_gem_shmem_free(). Drivers that employ the shmem helpers
  * should use it as their &drm_gem_object_funcs.free handler.
  */
 static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj)
 {
-	drm_gem_shmem_free_object(obj);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	drm_gem_shmem_free(shmem);
 }
 
 /**
@@ -166,7 +165,9 @@ static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj)
 static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent,
 						   const struct drm_gem_object *obj)
 {
-	drm_gem_shmem_print_info(p, indent, obj);
+	const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	drm_gem_shmem_print_info(shmem, p, indent);
 }
 
 /**
@@ -178,7 +179,9 @@ static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsign
  */
 static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj)
 {
-	return drm_gem_shmem_pin(obj);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	return drm_gem_shmem_pin(shmem);
 }
 
 /**
@@ -190,7 +193,9 @@ static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj)
  */
 static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj)
 {
-	drm_gem_shmem_unpin(obj);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	drm_gem_shmem_unpin(shmem);
 }
 
 /**
@@ -205,7 +210,9 @@ static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj)
  */
 static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj)
 {
-	return drm_gem_shmem_get_sg_table(obj);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	return drm_gem_shmem_get_sg_table(shmem);
 }
 
 /*
@@ -221,7 +228,9 @@ static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_
  */
 static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
 {
-	return drm_gem_shmem_vmap(obj, map);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	return drm_gem_shmem_vmap(shmem, map);
 }
 
 /*
@@ -234,7 +243,9 @@ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct d
  */
 static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
 {
-	drm_gem_shmem_vunmap(obj, map);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	drm_gem_shmem_vunmap(shmem, map);
 }
 
 /**
@@ -250,7 +261,9 @@ static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struc
  */
 static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 {
-	return drm_gem_shmem_mmap(obj, vma);
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+	return drm_gem_shmem_mmap(shmem, vma);
 }
 
 /*
@@ -261,8 +274,8 @@ struct drm_gem_object *
 drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
 				    struct dma_buf_attachment *attach,
 				    struct sg_table *sgt);
-
-struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj);
+int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
+			      struct drm_mode_create_dumb *args);
 
 /**
  * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations
-- 
cgit v1.2.3


From 507805b83ff108473dba9d4909e41abd50cf07f5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 10 Nov 2021 15:47:42 +0200
Subject: gpiolib: acpi: Remove never used devm_acpi_dev_remove_driver_gpios()

Remove never used devm_acpi_dev_remove_driver_gpios().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 include/linux/gpio/consumer.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 97a28ad3393b..3ad67b4a72be 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -690,7 +690,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev);
 
 int devm_acpi_dev_add_driver_gpios(struct device *dev,
 				   const struct acpi_gpio_mapping *gpios);
-void devm_acpi_dev_remove_driver_gpios(struct device *dev);
 
 struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label);
 
@@ -708,7 +707,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev,
 {
 	return -ENXIO;
 }
-static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {}
 
 #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */
 
-- 
cgit v1.2.3


From 10a2308ffb8cf262e473eb324fde42ae31b6da04 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 12 Nov 2021 18:16:34 +0800
Subject: net: Clean up some inconsistent indenting

Eliminate the follow smatch warning:

./include/linux/skbuff.h:4229 skb_remcsum_process() warn: inconsistent
indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 686a666d073d..c8cb7e697d47 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4226,7 +4226,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
 		return;
 	}
 
-	 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
 		__skb_checksum_complete(skb);
 		skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
 	}
-- 
cgit v1.2.3


From 168eed447129899611098219b70ef97b605bc6e1 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 2 Nov 2021 12:10:17 +0200
Subject: ASoC: SOF: IPC: Add new IPC command to free trace DMA

Add a new SOF_IPC_TRACE_DMA_FREE IPC command to stop and free trace DMA
in the FW.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20211102101019.14037-2-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/header.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h
index 4c747c52e01b..b97a76bcb655 100644
--- a/include/sound/sof/header.h
+++ b/include/sound/sof/header.h
@@ -119,6 +119,7 @@
 #define SOF_IPC_TRACE_DMA_POSITION		SOF_CMD_TYPE(0x002)
 #define SOF_IPC_TRACE_DMA_PARAMS_EXT		SOF_CMD_TYPE(0x003)
 #define SOF_IPC_TRACE_FILTER_UPDATE		SOF_CMD_TYPE(0x004) /**< ABI3.17 */
+#define SOF_IPC_TRACE_DMA_FREE		SOF_CMD_TYPE(0x005) /**< ABI3.20 */
 
 /* debug */
 #define SOF_IPC_DEBUG_MEM_USAGE			SOF_CMD_TYPE(0x001)
-- 
cgit v1.2.3


From 02d6fdecb9c38de19065f6bed8d5214556fd061d Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Thu, 4 Nov 2021 16:00:40 +0100
Subject: regmap: allow to define reg_update_bits for no bus configuration

Some device requires a special handling for reg_update_bits and can't use
the normal regmap read write logic. An example is when locking is
handled by the device and rmw operations requires to do atomic operations.
Allow to declare a dedicated function in regmap_config for
reg_update_bits in no bus configuration.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e3c9a25a853a..22652e5fbc38 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -290,6 +290,11 @@ typedef void (*regmap_unlock)(void *);
  *		  read operation on a bus such as SPI, I2C, etc. Most of the
  *		  devices do not need this.
  * @reg_write:	  Same as above for writing.
+ * @reg_update_bits: Optional callback that if filled will be used to perform
+ *		     all the update_bits(rmw) operation. Should only be provided
+ *		     if the function require special handling with lock and reg
+ *		     handling and the operation cannot be represented as a simple
+ *		     update_bits operation on a bus such as SPI, I2C, etc.
  * @fast_io:	  Register IO is fast. Use a spinlock instead of a mutex
  *	     	  to perform locking. This field is ignored if custom lock/unlock
  *	     	  functions are used (see fields lock/unlock of struct regmap_config).
@@ -372,6 +377,8 @@ struct regmap_config {
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+	int (*reg_update_bits)(void *context, unsigned int reg,
+			       unsigned int mask, unsigned int val);
 
 	bool fast_io;
 
-- 
cgit v1.2.3


From 45971bdd8ca8b5a99a49f4db86737401c45e246f Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:02:02 -0600
Subject: spi: remove unused header file <linux/platform_data/spi-clps711x.h>

Commit 6acaadc852f1 ("spi: clps711x: Driver refactor") removed the only use
of <linux/platform_data/spi-clps711x.h>, but left the header file behind.
This file is unused, delete it.

Cc: Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20211102220203.940290-9-corbet@lwn.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/platform_data/spi-clps711x.h | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 include/linux/platform_data/spi-clps711x.h

(limited to 'include')

diff --git a/include/linux/platform_data/spi-clps711x.h b/include/linux/platform_data/spi-clps711x.h
deleted file mode 100644
index efaa596848c9..000000000000
--- a/include/linux/platform_data/spi-clps711x.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  CLPS711X SPI bus driver definitions
- *
- *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
- */
-
-#ifndef ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H
-#define ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H
-
-/* Board specific platform_data */
-struct spi_clps711x_pdata {
-	int *chipselect;	/* Array of GPIO-numbers */
-	int num_chipselect;	/* Total count of GPIOs */
-};
-
-#endif
-- 
cgit v1.2.3


From a0ddee65c527d877e798205c1391c6170e580c66 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 12 Nov 2021 16:07:49 +0200
Subject: printk: Remove printk.h inclusion in percpu.h

After the commit 42a0bb3f7138 ("printk/nmi: generic solution for safe
printk in NMI") the printk.h is not needed anymore in percpu.h.

Moreover `make headerdep` complains (an excerpt)

In file included from linux/printk.h,
                 from linux/dynamic_debug.h:188
                 from linux/printk.h:559 <-- here
                 from linux/percpu.h:9
                 from linux/idr.h:17
include/net/9p/client.h:13: warning: recursive header inclusion

Yeah, it's not a root cause of this, but removing will help to reduce
the noise.

Fixes: 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20211112140749.80042-1-andriy.shevchenko@linux.intel.com
---
 include/linux/percpu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 5e76af742c80..4fa3000f9c22 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -6,7 +6,6 @@
 #include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
-#include <linux/printk.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
 
-- 
cgit v1.2.3


From 13cae4a104d2b7205696229ba85d34cc035f8c84 Mon Sep 17 00:00:00 2001
From: Matt Johnston <matt@codeconstruct.com.au>
Date: Mon, 15 Nov 2021 10:49:21 +0800
Subject: i2c: core: Allow 255 byte transfers for SMBus 3.x

SMBus 3.0 increased the maximum block transfer size from 32 bytes to
255 bytes. We increase the size of struct i2c_smbus_data's block[]
member.

i2c_smbus_xfer() and i2c_smbus_xfer_emulated() now support 255 byte
block operations, other block functions remain limited to 32 bytes for
compatibility with existing callers.

We allow adapters to indicate support for the larger size with
I2C_FUNC_SMBUS_V3_BLOCK. Most emulated drivers should be able to use 255
byte blocks by replacing I2C_SMBUS_BLOCK_MAX with I2C_SMBUS_V3_BLOCK_MAX
though some will have hardware limitations that need testing.

Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/i2c.h      | 13 +++++++++++++
 include/uapi/linux/i2c.h |  5 ++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 16119ac1aa97..353d6b4e7a53 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -52,6 +52,19 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *client,
 struct module;
 struct property_entry;
 
+/* SMBus 3.0 extends the maximum block read/write size to 255 (from 32).
+ * The larger size is only supported by some drivers, indicated by
+ * the I2C_FUNC_SMBUS_V3_BLOCK functionality bit.
+ */
+#define I2C_SMBUS_V3_BLOCK_MAX	255	/* As specified in SMBus 3.0 standard */
+
+/* Note compatibility definition in uapi header with 32 byte block */
+union i2c_smbus_data {
+	__u8 byte;
+	__u16 word;
+	__u8 block[I2C_SMBUS_V3_BLOCK_MAX + 1]; /* block[0] is used for length */
+};
+
 #if IS_ENABLED(CONFIG_I2C)
 /* Return the Frequency mode string based on the bus frequency */
 const char *i2c_freq_mode_string(u32 bus_freq_hz);
diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h
index 92326ebde350..7b7d90b50cf0 100644
--- a/include/uapi/linux/i2c.h
+++ b/include/uapi/linux/i2c.h
@@ -108,6 +108,7 @@ struct i2c_msg {
 #define I2C_FUNC_SMBUS_READ_I2C_BLOCK	0x04000000 /* I2C-like block xfer  */
 #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK	0x08000000 /* w/ 1-byte reg. addr. */
 #define I2C_FUNC_SMBUS_HOST_NOTIFY	0x10000000 /* SMBus 2.0 or later */
+#define I2C_FUNC_SMBUS_V3_BLOCK		0x20000000 /* Device supports 255 byte block */
 
 #define I2C_FUNC_SMBUS_BYTE		(I2C_FUNC_SMBUS_READ_BYTE | \
 					 I2C_FUNC_SMBUS_WRITE_BYTE)
@@ -137,13 +138,15 @@ struct i2c_msg {
 /*
  * Data for SMBus Messages
  */
-#define I2C_SMBUS_BLOCK_MAX	32	/* As specified in SMBus standard */
+#define I2C_SMBUS_BLOCK_MAX	32	/* As specified in SMBus 2.0 standard */
+#ifndef __KERNEL__
 union i2c_smbus_data {
 	__u8 byte;
 	__u16 word;
 	__u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */
 			       /* and one more for user-space compatibility */
 };
+#endif
 
 /* i2c_smbus_xfer read or write markers */
 #define I2C_SMBUS_READ	1
-- 
cgit v1.2.3


From 84a107e68b34217eff536e81a6a6f419ee0d0f7e Mon Sep 17 00:00:00 2001
From: Matt Johnston <matt@codeconstruct.com.au>
Date: Mon, 15 Nov 2021 10:49:22 +0800
Subject: i2c: dev: Handle 255 byte blocks for i2c ioctl

I2C_SMBUS is limited to 32 bytes due to compatibility with the
32 byte i2c_smbus_data.block

I2C_RDWR allows larger transfers if sufficient sized buffers are passed.

Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/i2c-dev.h | 2 ++
 include/uapi/linux/i2c.h     | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/i2c-dev.h b/include/uapi/linux/i2c-dev.h
index 1c4cec4ddd84..46ce31d42f7d 100644
--- a/include/uapi/linux/i2c-dev.h
+++ b/include/uapi/linux/i2c-dev.h
@@ -39,12 +39,14 @@
 
 
 /* This is the structure as used in the I2C_SMBUS ioctl call */
+#ifndef __KERNEL__
 struct i2c_smbus_ioctl_data {
 	__u8 read_write;
 	__u8 command;
 	__u32 size;
 	union i2c_smbus_data __user *data;
 };
+#endif
 
 /* This is the structure as used in the I2C_RDWR ioctl call */
 struct i2c_rdwr_ioctl_data {
diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h
index 7b7d90b50cf0..c3534ab1ae53 100644
--- a/include/uapi/linux/i2c.h
+++ b/include/uapi/linux/i2c.h
@@ -109,6 +109,8 @@ struct i2c_msg {
 #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK	0x08000000 /* w/ 1-byte reg. addr. */
 #define I2C_FUNC_SMBUS_HOST_NOTIFY	0x10000000 /* SMBus 2.0 or later */
 #define I2C_FUNC_SMBUS_V3_BLOCK		0x20000000 /* Device supports 255 byte block */
+						   /* Note that I2C_SMBUS ioctl only */
+						   /* supports a 32 byte block */
 
 #define I2C_FUNC_SMBUS_BYTE		(I2C_FUNC_SMBUS_READ_BYTE | \
 					 I2C_FUNC_SMBUS_WRITE_BYTE)
-- 
cgit v1.2.3


From 34ae2c09d46a2d0abd907e139b466f798e4095a8 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 15 Nov 2021 10:00:27 +0000
Subject: net: phylink: add generic validate implementation

Add a generic validate() implementation using the supported_interfaces
and a bitmask of MAC pause/speed/duplex capabilities. This allows us
to entirely eliminate many driver private validate() implementations.

We expose the underlying phylink_get_linkmodes() function so that
drivers which have special needs can still benefit from conversion.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index f037470b6fb3..3563820a1765 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -20,6 +20,29 @@ enum {
 	MLO_AN_PHY = 0,	/* Conventional PHY */
 	MLO_AN_FIXED,	/* Fixed-link mode */
 	MLO_AN_INBAND,	/* In-band protocol */
+
+	MAC_SYM_PAUSE	= BIT(0),
+	MAC_ASYM_PAUSE	= BIT(1),
+	MAC_10HD	= BIT(2),
+	MAC_10FD	= BIT(3),
+	MAC_10		= MAC_10HD | MAC_10FD,
+	MAC_100HD	= BIT(4),
+	MAC_100FD	= BIT(5),
+	MAC_100		= MAC_100HD | MAC_100FD,
+	MAC_1000HD	= BIT(6),
+	MAC_1000FD	= BIT(7),
+	MAC_1000	= MAC_1000HD | MAC_1000FD,
+	MAC_2500FD	= BIT(8),
+	MAC_5000FD	= BIT(9),
+	MAC_10000FD	= BIT(10),
+	MAC_20000FD	= BIT(11),
+	MAC_25000FD	= BIT(12),
+	MAC_40000FD	= BIT(13),
+	MAC_50000FD	= BIT(14),
+	MAC_56000FD	= BIT(15),
+	MAC_100000FD	= BIT(16),
+	MAC_200000FD	= BIT(17),
+	MAC_400000FD	= BIT(18),
 };
 
 static inline bool phylink_autoneg_inband(unsigned int mode)
@@ -69,6 +92,7 @@ enum phylink_op_type {
  *		     if MAC link is at %MLO_AN_FIXED mode.
  * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx
  *                        are supported by the MAC/PCS.
+ * @mac_capabilities: MAC pause/speed/duplex capabilities.
  */
 struct phylink_config {
 	struct device *dev;
@@ -79,6 +103,7 @@ struct phylink_config {
 	void (*get_fixed_state)(struct phylink_config *config,
 				struct phylink_link_state *state);
 	DECLARE_PHY_INTERFACE_MASK(supported_interfaces);
+	unsigned long mac_capabilities;
 };
 
 /**
@@ -442,6 +467,12 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 		 phy_interface_t interface, int speed, int duplex);
 #endif
 
+void phylink_get_linkmodes(unsigned long *linkmodes, phy_interface_t interface,
+			   unsigned long mac_capabilities);
+void phylink_generic_validate(struct phylink_config *config,
+			      unsigned long *supported,
+			      struct phylink_link_state *state);
+
 struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
 			       phy_interface_t iface,
 			       const struct phylink_mac_ops *mac_ops);
-- 
cgit v1.2.3


From 2f6a470d6545841cf1891b87e360d3998ef024c8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 15 Nov 2021 07:49:46 -0800
Subject: Revert "Merge branch 'mctp-i2c-driver'"

This reverts commit 71812af7234f30362b43ccff33f93890ae4c0655, reversing
changes made to cc0be1ad686fb29a4d127948486f40b17fb34b50.

Wolfram Sang says:

Please revert. Besides the driver in net, it modifies the I2C core
code. This has not been acked by the I2C maintainer (in this case me).
So, please don't pull this in via the net tree. The question raised here
(extending SMBus calls to 255 byte) is complicated because we need ABI
backwards compatibility.

Link: https://lore.kernel.org/all/YZJ9H4eM%2FM7OXVN0@shikoro/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/i2c.h          | 13 -------------
 include/uapi/linux/i2c-dev.h |  2 --
 include/uapi/linux/i2c.h     |  7 +------
 3 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 353d6b4e7a53..16119ac1aa97 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -52,19 +52,6 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *client,
 struct module;
 struct property_entry;
 
-/* SMBus 3.0 extends the maximum block read/write size to 255 (from 32).
- * The larger size is only supported by some drivers, indicated by
- * the I2C_FUNC_SMBUS_V3_BLOCK functionality bit.
- */
-#define I2C_SMBUS_V3_BLOCK_MAX	255	/* As specified in SMBus 3.0 standard */
-
-/* Note compatibility definition in uapi header with 32 byte block */
-union i2c_smbus_data {
-	__u8 byte;
-	__u16 word;
-	__u8 block[I2C_SMBUS_V3_BLOCK_MAX + 1]; /* block[0] is used for length */
-};
-
 #if IS_ENABLED(CONFIG_I2C)
 /* Return the Frequency mode string based on the bus frequency */
 const char *i2c_freq_mode_string(u32 bus_freq_hz);
diff --git a/include/uapi/linux/i2c-dev.h b/include/uapi/linux/i2c-dev.h
index 46ce31d42f7d..1c4cec4ddd84 100644
--- a/include/uapi/linux/i2c-dev.h
+++ b/include/uapi/linux/i2c-dev.h
@@ -39,14 +39,12 @@
 
 
 /* This is the structure as used in the I2C_SMBUS ioctl call */
-#ifndef __KERNEL__
 struct i2c_smbus_ioctl_data {
 	__u8 read_write;
 	__u8 command;
 	__u32 size;
 	union i2c_smbus_data __user *data;
 };
-#endif
 
 /* This is the structure as used in the I2C_RDWR ioctl call */
 struct i2c_rdwr_ioctl_data {
diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h
index c3534ab1ae53..92326ebde350 100644
--- a/include/uapi/linux/i2c.h
+++ b/include/uapi/linux/i2c.h
@@ -108,9 +108,6 @@ struct i2c_msg {
 #define I2C_FUNC_SMBUS_READ_I2C_BLOCK	0x04000000 /* I2C-like block xfer  */
 #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK	0x08000000 /* w/ 1-byte reg. addr. */
 #define I2C_FUNC_SMBUS_HOST_NOTIFY	0x10000000 /* SMBus 2.0 or later */
-#define I2C_FUNC_SMBUS_V3_BLOCK		0x20000000 /* Device supports 255 byte block */
-						   /* Note that I2C_SMBUS ioctl only */
-						   /* supports a 32 byte block */
 
 #define I2C_FUNC_SMBUS_BYTE		(I2C_FUNC_SMBUS_READ_BYTE | \
 					 I2C_FUNC_SMBUS_WRITE_BYTE)
@@ -140,15 +137,13 @@ struct i2c_msg {
 /*
  * Data for SMBus Messages
  */
-#define I2C_SMBUS_BLOCK_MAX	32	/* As specified in SMBus 2.0 standard */
-#ifndef __KERNEL__
+#define I2C_SMBUS_BLOCK_MAX	32	/* As specified in SMBus standard */
 union i2c_smbus_data {
 	__u8 byte;
 	__u16 word;
 	__u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */
 			       /* and one more for user-space compatibility */
 };
-#endif
 
 /* i2c_smbus_xfer read or write markers */
 #define I2C_SMBUS_READ	1
-- 
cgit v1.2.3


From ce6838afc9244171cd07620bbb82e18695c491e9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 15 Nov 2021 13:53:12 +0200
Subject: agp/intel-gtt: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/4d6a976459547407979f4b4c05a52785523e6bd8.1636977089.git.jani.nikula@intel.com
---
 include/drm/intel-gtt.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index abfefaaf897a..4e5f8e7e25d0 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -6,7 +6,10 @@
 
 #include <linux/agp_backend.h>
 #include <linux/intel-iommu.h>
-#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct pci_dev;
+struct sg_table;
 
 void intel_gtt_get(u64 *gtt_total,
 		   phys_addr_t *mappable_base,
-- 
cgit v1.2.3


From 7e78153aef7f9efcb935487402151de31e0836ad Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 15 Nov 2021 13:53:13 +0200
Subject: agp/intel-gtt: reduce intel-gtt dependencies more

Don't include stuff on behalf of users if they're not strictly necessary
for the header.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/7bcaa1684587b9b008d3c41468fb40e63c54fbc7.1636977089.git.jani.nikula@intel.com
---
 include/drm/intel-gtt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 4e5f8e7e25d0..67530bfef129 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -4,10 +4,9 @@
 #ifndef _DRM_INTEL_GTT_H
 #define	_DRM_INTEL_GTT_H
 
-#include <linux/agp_backend.h>
-#include <linux/intel-iommu.h>
 #include <linux/types.h>
 
+struct agp_bridge_data;
 struct pci_dev;
 struct sg_table;
 
-- 
cgit v1.2.3


From f64bd790b750dd281406964af40d16adfc88a074 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 29 Oct 2021 12:51:32 -0700
Subject: ACPI: Keep sub-table parsing infrastructure available for modules

The NFIT driver and now the CXL ACPI driver have both open-coded ACPI
table parsing. Before another instance is added arrange for the core
ACPI sub-table parsing to be optionally available to drivers via the
CONFIG_ACPI_TABLE_LIB symbol. If no drivers select the symbol then the
infrastructure reverts back to being tagged __init via the
__init_or_acpilib annotation.

For now, only tag the core sub-table routines and data that the CEDT parsing in
the cxl_acpi driver would want to reuse, a CEDT parsing helper is added
in a later change.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Alison Schofield <alison.schofield@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/163553709227.2509508.8215196520233473814.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/acpi.h | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 143ce7e0bee1..edfa3c8f3562 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -232,14 +232,22 @@ int acpi_locate_initial_tables (void);
 void acpi_reserve_initial_tables (void);
 void acpi_table_init_complete (void);
 int acpi_table_init (void);
+
+#ifdef CONFIG_ACPI_TABLE_LIB
+#define __init_or_acpilib
+#define __initdata_or_acpilib
+#else
+#define __init_or_acpilib __init
+#define __initdata_or_acpilib __initdata
+#endif
+
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
-int __init acpi_table_parse_entries(char *id, unsigned long table_size,
-			      int entry_id,
-			      acpi_tbl_entry_handler handler,
-			      unsigned int max_entries);
-int __init acpi_table_parse_entries_array(char *id, unsigned long table_size,
-			      struct acpi_subtable_proc *proc, int proc_num,
-			      unsigned int max_entries);
+int __init_or_acpilib acpi_table_parse_entries(char *id,
+		unsigned long table_size, int entry_id,
+		acpi_tbl_entry_handler handler, unsigned int max_entries);
+int __init_or_acpilib acpi_table_parse_entries_array(char *id,
+		unsigned long table_size, struct acpi_subtable_proc *proc,
+		int proc_num, unsigned int max_entries);
 int acpi_table_parse_madt(enum acpi_madt_type id,
 			  acpi_tbl_entry_handler handler,
 			  unsigned int max_entries);
-- 
cgit v1.2.3


From ad2f63971e9655e3987db32dac85aa50658790eb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 29 Oct 2021 12:51:37 -0700
Subject: ACPI: Teach ACPI table parsing about the CEDT header format

The CEDT adds yet one more unique subtable header type where the length
is a 16-bit value. Extend the subtable helpers to detect this scenario.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Len Brown <lenb@kernel.org>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/163553709742.2509508.5177761945441327574.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index edfa3c8f3562..6b7f181d51e2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -133,6 +133,7 @@ union acpi_subtable_headers {
 	struct acpi_subtable_header common;
 	struct acpi_hmat_structure hmat;
 	struct acpi_prmt_module_header prmt;
+	struct acpi_cedt_header cedt;
 };
 
 typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
-- 
cgit v1.2.3


From 2d03e46a4bad20191d07b83ec1242d5f002577be Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 29 Oct 2021 12:51:42 -0700
Subject: ACPI: Add a context argument for table parsing handlers

In preparation for drivers reusing the core table parsing
infrastructure, arrange for handlers to take a context argument. This
allows driver table parsing to wrap ACPI table entries in
driver-specific data.

The first consumer of this infrastructure is the CEDT parsing that
happens in the cxl_acpi driver, add a conditional
(CONFIG_ACPI_TABLE_LIB=y) export of a acpi_table_parse_cedt() helper for
this case.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Len Brown <lenb@kernel.org>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/163553710257.2509508.14310494417463866020.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/acpi.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6b7f181d51e2..95f88108f664 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -141,6 +141,9 @@ typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
 typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header,
 				      const unsigned long end);
 
+typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header,
+					  void *arg, const unsigned long end);
+
 /* Debugger support */
 
 struct acpi_debugger_ops {
@@ -217,6 +220,8 @@ static inline int acpi_debugger_notify_command_complete(void)
 struct acpi_subtable_proc {
 	int id;
 	acpi_tbl_entry_handler handler;
+	acpi_tbl_entry_handler_arg handler_arg;
+	void *arg;
 	int count;
 };
 
@@ -235,9 +240,11 @@ void acpi_table_init_complete (void);
 int acpi_table_init (void);
 
 #ifdef CONFIG_ACPI_TABLE_LIB
+#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
 #define __init_or_acpilib
 #define __initdata_or_acpilib
 #else
+#define EXPORT_SYMBOL_ACPI_LIB(x)
 #define __init_or_acpilib __init
 #define __initdata_or_acpilib __initdata
 #endif
@@ -252,6 +259,10 @@ int __init_or_acpilib acpi_table_parse_entries_array(char *id,
 int acpi_table_parse_madt(enum acpi_madt_type id,
 			  acpi_tbl_entry_handler handler,
 			  unsigned int max_entries);
+int __init_or_acpilib
+acpi_table_parse_cedt(enum acpi_cedt_type id,
+		      acpi_tbl_entry_handler_arg handler_arg, void *arg);
+
 int acpi_parse_mcfg (struct acpi_table_header *header);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 
-- 
cgit v1.2.3


From 03b122da74b22fbe7cd98184fa5657a9ce13970c Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 26 Oct 2021 15:00:48 -0700
Subject: x86/sgx: Hook arch_memory_failure() into mainline code

Add a call inside memory_failure() to call the arch specific code
to check if the address is an SGX EPC page and handle it.

Note the SGX EPC pages do not have a "struct page" entry, so the hook
goes in at the same point as the device mapping hook.

Pull the call to acquire the mutex earlier so the SGX errors are also
protected.

Make set_mce_nospec() skip SGX pages when trying to adjust
the 1:1 map.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Tested-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20211026220050.697075-6-tony.luck@intel.com
---
 include/linux/mm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..57f1aa2a33b6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3231,6 +3231,19 @@ extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 
+#ifndef arch_memory_failure
+static inline int arch_memory_failure(unsigned long pfn, int flags)
+{
+	return -ENXIO;
+}
+#endif
+
+#ifndef arch_is_platform_page
+static inline bool arch_is_platform_page(u64 paddr)
+{
+	return false;
+}
+#endif
 
 /*
  * Error handlers for various types of pages.
-- 
cgit v1.2.3


From 749303055b78bc38ec0790ccc596cae235446367 Mon Sep 17 00:00:00 2001
From: Simon Trimmer <simont@opensource.cirrus.com>
Date: Mon, 15 Nov 2021 12:02:15 +0000
Subject: firmware: cs_dsp: tidy includes in cs_dsp.c and cs_dsp.h

This patch removes unused included header files and moves others into
cs_dsp.h to ensure that types referenced in the header file are properly
described to prevent compiler warnings.

Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211115120215.56824-1-simont@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 9ad9eaaaa552..3a54b1afc48f 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -11,6 +11,11 @@
 #ifndef __CS_DSP_H
 #define __CS_DSP_H
 
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/list.h>
+#include <linux/regmap.h>
+
 #define CS_ADSP2_REGION_0 BIT(0)
 #define CS_ADSP2_REGION_1 BIT(1)
 #define CS_ADSP2_REGION_2 BIT(2)
-- 
cgit v1.2.3


From e9380df851878cee71df5a1c7611584421527f7e Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Sun, 31 Oct 2021 20:48:52 -0500
Subject: ACPI: Add stubs for wakeup handler functions

The commit ddfd9dcf270c ("ACPI: PM: Add acpi_[un]register_wakeup_handler()")
added new functions for drivers to use during the s2idle wakeup path, but
didn't add stubs for when CONFIG_ACPI wasn't set.

Add those stubs in for other drivers to be able to use.

Fixes: ddfd9dcf270c ("ACPI: PM: Add acpi_[un]register_wakeup_handler()")
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20211101014853.6177-1-mario.limonciello@amd.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/acpi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 143ce7e0bee1..668d007f0917 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -974,6 +974,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr)
 	return -ENODEV;
 }
 
+static inline int acpi_register_wakeup_handler(int wake_irq,
+	bool (*wakeup)(void *context), void *context)
+{
+	return -ENXIO;
+}
+
+static inline void acpi_unregister_wakeup_handler(
+	bool (*wakeup)(void *context), void *context) { }
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
-- 
cgit v1.2.3


From a3143f7822a9eeb38f0e046080ae8f79f6c7122d Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:01:58 -0600
Subject: Remove unused header <linux/sdb.h>

Commit 6a80b30086b8 ("fmc: Delete the FMC subsystem") removed the last user
of <linux/sdb.h>, but left the header file behind.  Nothing uses this file,
delete it now.

Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Link: https://lore.kernel.org/r/20211102220203.940290-5-corbet@lwn.net
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/sdb.h | 160 ----------------------------------------------------
 1 file changed, 160 deletions(-)
 delete mode 100644 include/linux/sdb.h

(limited to 'include')

diff --git a/include/linux/sdb.h b/include/linux/sdb.h
deleted file mode 100644
index a2404a2bbd10..000000000000
--- a/include/linux/sdb.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the official version 1.1 of sdb.h
- */
-#ifndef __SDB_H__
-#define __SDB_H__
-#ifdef __KERNEL__
-#include <linux/types.h>
-#else
-#include <stdint.h>
-#endif
-
-/*
- * All structures are 64 bytes long and are expected
- * to live in an array, one for each interconnect.
- * Most fields of the structures are shared among the
- * various types, and most-specific fields are at the
- * beginning (for alignment reasons, and to keep the
- * magic number at the head of the interconnect record
- */
-
-/* Product, 40 bytes at offset 24, 8-byte aligned
- *
- * device_id is vendor-assigned; version is device-specific,
- * date is hex (e.g 0x20120501), name is UTF-8, blank-filled
- * and not terminated with a 0 byte.
- */
-struct sdb_product {
-	uint64_t		vendor_id;	/* 0x18..0x1f */
-	uint32_t		device_id;	/* 0x20..0x23 */
-	uint32_t		version;	/* 0x24..0x27 */
-	uint32_t		date;		/* 0x28..0x2b */
-	uint8_t			name[19];	/* 0x2c..0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/*
- * Component, 56 bytes at offset 8, 8-byte aligned
- *
- * The address range is first to last, inclusive
- * (for example 0x100000 - 0x10ffff)
- */
-struct sdb_component {
-	uint64_t		addr_first;	/* 0x08..0x0f */
-	uint64_t		addr_last;	/* 0x10..0x17 */
-	struct sdb_product	product;	/* 0x18..0x3f */
-};
-
-/* Type of the SDB record */
-enum sdb_record_type {
-	sdb_type_interconnect	= 0x00,
-	sdb_type_device		= 0x01,
-	sdb_type_bridge		= 0x02,
-	sdb_type_integration	= 0x80,
-	sdb_type_repo_url	= 0x81,
-	sdb_type_synthesis	= 0x82,
-	sdb_type_empty		= 0xFF,
-};
-
-/* Type 0: interconnect (first of the array)
- *
- * sdb_records is the length of the table including this first
- * record, version is 1. The bus type is enumerated later.
- */
-#define				SDB_MAGIC	0x5344422d /* "SDB-" */
-struct sdb_interconnect {
-	uint32_t		sdb_magic;	/* 0x00-0x03 */
-	uint16_t		sdb_records;	/* 0x04-0x05 */
-	uint8_t			sdb_version;	/* 0x06 */
-	uint8_t			sdb_bus_type;	/* 0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 1: device
- *
- * class is 0 for "custom device", other values are
- * to be standardized; ABI version is for the driver,
- * bus-specific bits are defined by each bus (see below)
- */
-struct sdb_device {
-	uint16_t		abi_class;	/* 0x00-0x01 */
-	uint8_t			abi_ver_major;	/* 0x02 */
-	uint8_t			abi_ver_minor;	/* 0x03 */
-	uint32_t		bus_specific;	/* 0x04-0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 2: bridge
- *
- * child is the address of the nested SDB table
- */
-struct sdb_bridge {
-	uint64_t		sdb_child;	/* 0x00-0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 0x80: integration
- *
- * all types with bit 7 set are meta-information, so
- * software can ignore the types it doesn't know. Here we
- * just provide product information for an aggregate device
- */
-struct sdb_integration {
-	uint8_t			reserved[24];	/* 0x00-0x17 */
-	struct sdb_product	product;	/* 0x08-0x3f */
-};
-
-/* Type 0x81: Top module repository url
- *
- * again, an informative field that software can ignore
- */
-struct sdb_repo_url {
-	uint8_t			repo_url[63];	/* 0x00-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* Type 0x82: Synthesis tool information
- *
- * this informative record
- */
-struct sdb_synthesis {
-	uint8_t			syn_name[16];	/* 0x00-0x0f */
-	uint8_t			commit_id[16];	/* 0x10-0x1f */
-	uint8_t			tool_name[8];	/* 0x20-0x27 */
-	uint32_t		tool_version;	/* 0x28-0x2b */
-	uint32_t		date;		/* 0x2c-0x2f */
-	uint8_t			user_name[15];	/* 0x30-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* Type 0xff: empty
- *
- * this allows keeping empty slots during development,
- * so they can be filled later with minimal efforts and
- * no misleading description is ever shipped -- hopefully.
- * It can also be used to pad a table to a desired length.
- */
-struct sdb_empty {
-	uint8_t			reserved[63];	/* 0x00-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* The type of bus, for bus-specific flags */
-enum sdb_bus_type {
-	sdb_wishbone = 0x00,
-	sdb_data     = 0x01,
-};
-
-#define SDB_WB_WIDTH_MASK	0x0f
-#define SDB_WB_ACCESS8			0x01
-#define SDB_WB_ACCESS16			0x02
-#define SDB_WB_ACCESS32			0x04
-#define SDB_WB_ACCESS64			0x08
-#define SDB_WB_LITTLE_ENDIAN	0x80
-
-#define SDB_DATA_READ		0x04
-#define SDB_DATA_WRITE		0x02
-#define SDB_DATA_EXEC		0x01
-
-#endif /* __SDB_H__ */
-- 
cgit v1.2.3


From 353050be4c19e102178ccc05988101887c25ae53 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Nov 2021 18:48:08 +0000
Subject: bpf: Fix toctou on read-only map's constant scalar tracking

Commit a23740ec43ba ("bpf: Track contents of read-only maps as scalars") is
checking whether maps are read-only both from BPF program side and user space
side, and then, given their content is constant, reading out their data via
map->ops->map_direct_value_addr() which is then subsequently used as known
scalar value for the register, that is, it is marked as __mark_reg_known()
with the read value at verification time. Before a23740ec43ba, the register
content was marked as an unknown scalar so the verifier could not make any
assumptions about the map content.

The current implementation however is prone to a TOCTOU race, meaning, the
value read as known scalar for the register is not guaranteed to be exactly
the same at a later point when the program is executed, and as such, the
prior made assumptions of the verifier with regards to the program will be
invalid which can cause issues such as OOB access, etc.

While the BPF_F_RDONLY_PROG map flag is always fixed and required to be
specified at map creation time, the map->frozen property is initially set to
false for the map given the map value needs to be populated, e.g. for global
data sections. Once complete, the loader "freezes" the map from user space
such that no subsequent updates/deletes are possible anymore. For the rest
of the lifetime of the map, this freeze one-time trigger cannot be undone
anymore after a successful BPF_MAP_FREEZE cmd return. Meaning, any new BPF_*
cmd calls which would update/delete map entries will be rejected with -EPERM
since map_get_sys_perms() removes the FMODE_CAN_WRITE permission. This also
means that pending update/delete map entries must still complete before this
guarantee is given. This corner case is not an issue for loaders since they
create and prepare such program private map in successive steps.

However, a malicious user is able to trigger this TOCTOU race in two different
ways: i) via userfaultfd, and ii) via batched updates. For i) userfaultfd is
used to expand the competition interval, so that map_update_elem() can modify
the contents of the map after map_freeze() and bpf_prog_load() were executed.
This works, because userfaultfd halts the parallel thread which triggered a
map_update_elem() at the time where we copy key/value from the user buffer and
this already passed the FMODE_CAN_WRITE capability test given at that time the
map was not "frozen". Then, the main thread performs the map_freeze() and
bpf_prog_load(), and once that had completed successfully, the other thread
is woken up to complete the pending map_update_elem() which then changes the
map content. For ii) the idea of the batched update is similar, meaning, when
there are a large number of updates to be processed, it can increase the
competition interval between the two. It is therefore possible in practice to
modify the contents of the map after executing map_freeze() and bpf_prog_load().

One way to fix both i) and ii) at the same time is to expand the use of the
map's map->writecnt. The latter was introduced in fc9702273e2e ("bpf: Add mmap()
support for BPF_MAP_TYPE_ARRAY") and further refined in 1f6cb19be2e2 ("bpf:
Prevent re-mmap()'ing BPF map as writable for initially r/o mapping") with
the rationale to make a writable mmap()'ing of a map mutually exclusive with
read-only freezing. The counter indicates writable mmap() mappings and then
prevents/fails the freeze operation. Its semantics can be expanded beyond
just mmap() by generally indicating ongoing write phases. This would essentially
span any parallel regular and batched flavor of update/delete operation and
then also have map_freeze() fail with -EBUSY. For the check_mem_access() in
the verifier we expand upon the bpf_map_is_rdonly() check ensuring that all
last pending writes have completed via bpf_map_write_active() test. Once the
map->frozen is set and bpf_map_write_active() indicates a map->writecnt of 0
only then we are really guaranteed to use the map's data as known constants.
For map->frozen being set and pending writes in process of still being completed
we fall back to marking that register as unknown scalar so we don't end up
making assumptions about it. With this, both TOCTOU reproducers from i) and
ii) are fixed.

Note that the map->writecnt has been converted into a atomic64 in the fix in
order to avoid a double freeze_mutex mutex_{un,}lock() pair when updating
map->writecnt in the various map update/delete BPF_* cmd flavors. Spanning
the freeze_mutex over entire map update/delete operations in syscall side
would not be possible due to then causing everything to be serialized.
Similarly, something like synchronize_rcu() after setting map->frozen to wait
for update/deletes to complete is not possible either since it would also
have to span the user copy which can sleep. On the libbpf side, this won't
break d66562fba1ce ("libbpf: Add BPF object skeleton support") as the
anonymous mmap()-ed "map initialization image" is remapped as a BPF map-backed
mmap()-ed memory where for .rodata it's non-writable.

Fixes: a23740ec43ba ("bpf: Track contents of read-only maps as scalars")
Reported-by: w1tcher.bupt@gmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f715e8863f4d..e7a163a3146b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,7 @@ struct bpf_map {
 	atomic64_t usercnt;
 	struct work_struct work;
 	struct mutex freeze_mutex;
-	u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
+	atomic64_t writecnt;
 };
 
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -1419,6 +1419,7 @@ void bpf_map_put(struct bpf_map *map);
 void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
+bool bpf_map_write_active(const struct bpf_map *map);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 int  generic_map_lookup_batch(struct bpf_map *map,
 			      const union bpf_attr *attr,
-- 
cgit v1.2.3


From 7206998f578d5553989bc01ea2e544b622e79539 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 Nov 2021 08:24:59 +0100
Subject: ALSA: hda: Fix potential deadlock at codec unbinding

When a codec is unbound dynamically via sysfs while its stream is in
use, we may face a potential deadlock at the proc remove or a UAF.
This happens since the hda_pcm is managed by a linked list, as it
handles the hda_pcm object release via kref.

When a PCM is opened at the unbinding time, the release of hda_pcm
gets delayed and it ends up with the close of the PCM stream releasing
the associated hda_pcm object of its own.  The hda_pcm destructor
contains the PCM device release that includes the removal of procfs
entries.  And, this removal has the sync of the close of all in-use
files -- which would never finish because it's called from the PCM
file descriptor itself, i.e. it's trying to shoot its foot.

For addressing the deadlock above, this patch changes the way to
manage and release the hda_pcm object.  The kref of hda_pcm is
dropped, and instead a simple refcount is introduced in hda_codec for
keeping the track of the active PCM streams, and at each PCM open and
close, this refcount is adjusted accordingly.  At unbinding, the
driver calls snd_device_disconnect() for each PCM stream, then
synchronizes with the refcount finish, and finally releases the object
resources.

Fixes: bbbc7e8502c9 ("ALSA: hda - Allocate hda_pcm objects dynamically")
Link: https://lore.kernel.org/r/20211116072459.18930-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_codec.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h
index 0e45963bb767..82d9daa17851 100644
--- a/include/sound/hda_codec.h
+++ b/include/sound/hda_codec.h
@@ -8,7 +8,7 @@
 #ifndef __SOUND_HDA_CODEC_H
 #define __SOUND_HDA_CODEC_H
 
-#include <linux/kref.h>
+#include <linux/refcount.h>
 #include <linux/mod_devicetable.h>
 #include <sound/info.h>
 #include <sound/control.h>
@@ -166,8 +166,8 @@ struct hda_pcm {
 	bool own_chmap;		/* codec driver provides own channel maps */
 	/* private: */
 	struct hda_codec *codec;
-	struct kref kref;
 	struct list_head list;
+	unsigned int disconnected:1;
 };
 
 /* codec information */
@@ -187,6 +187,8 @@ struct hda_codec {
 
 	/* PCM to create, set by patch_ops.build_pcms callback */
 	struct list_head pcm_list_head;
+	refcount_t pcm_ref;
+	wait_queue_head_t remove_sleep;
 
 	/* codec specific info */
 	void *spec;
@@ -420,7 +422,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec);
 
 static inline void snd_hda_codec_pcm_get(struct hda_pcm *pcm)
 {
-	kref_get(&pcm->kref);
+	refcount_inc(&pcm->codec->pcm_ref);
 }
 void snd_hda_codec_pcm_put(struct hda_pcm *pcm);
 
-- 
cgit v1.2.3


From 2c95b92ecd92e784785b1db8cccc4f0f2bfa850c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 Nov 2021 08:33:58 +0100
Subject: ALSA: memalloc: Unify x86 SG-buffer handling (take#3)

This is a second attempt to unify the x86-specific SG-buffer handling
code with the new standard non-contiguous page handler.

The first try (in commit 2d9ea39917a4) failed due to the wrong page
and address calculations, hence reverted.  (And the second try failed
due to a copy&paste error.)  Now it's corrected with the previous fix
for noncontig pages, and the proper sg page iteration by this patch.

After the migration, SNDRV_DMA_TYPE_DMA_SG becomes identical with
SNDRV_DMA_TYPE_NONCONTIG on x86, while others still fall back to
SNDRV_DMA_TYPE_DEV.

Tested-by: Alex Xu (Hello71) <alex_y_xu@yahoo.ca>
Tested-by: Harald Arnesen <harald@skogtun.org>
Link: https://lore.kernel.org/r/20211017074859.24112-4-tiwai@suse.de
Link: https://lore.kernel.org/r/20211109062235.22310-1-tiwai@suse.de
Link: https://lore.kernel.org/r/20211116073358.19741-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/memalloc.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 1051b84e8579..653dfffb3ac8 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -36,13 +36,6 @@ struct snd_dma_device {
 #define SNDRV_DMA_TYPE_CONTINUOUS	1	/* continuous no-DMA memory */
 #define SNDRV_DMA_TYPE_DEV		2	/* generic device continuous */
 #define SNDRV_DMA_TYPE_DEV_WC		5	/* continuous write-combined */
-#ifdef CONFIG_SND_DMA_SGBUF
-#define SNDRV_DMA_TYPE_DEV_SG		3	/* generic device SG-buffer */
-#define SNDRV_DMA_TYPE_DEV_WC_SG	6	/* SG write-combined */
-#else
-#define SNDRV_DMA_TYPE_DEV_SG	SNDRV_DMA_TYPE_DEV /* no SG-buf support */
-#define SNDRV_DMA_TYPE_DEV_WC_SG	SNDRV_DMA_TYPE_DEV_WC
-#endif
 #ifdef CONFIG_GENERIC_ALLOCATOR
 #define SNDRV_DMA_TYPE_DEV_IRAM		4	/* generic device iram-buffer */
 #else
@@ -51,6 +44,13 @@ struct snd_dma_device {
 #define SNDRV_DMA_TYPE_VMALLOC		7	/* vmalloc'ed buffer */
 #define SNDRV_DMA_TYPE_NONCONTIG	8	/* non-coherent SG buffer */
 #define SNDRV_DMA_TYPE_NONCOHERENT	9	/* non-coherent buffer */
+#ifdef CONFIG_SND_DMA_SGBUF
+#define SNDRV_DMA_TYPE_DEV_SG		SNDRV_DMA_TYPE_NONCONTIG
+#define SNDRV_DMA_TYPE_DEV_WC_SG	6	/* SG write-combined */
+#else
+#define SNDRV_DMA_TYPE_DEV_SG	SNDRV_DMA_TYPE_DEV /* no SG-buf support */
+#define SNDRV_DMA_TYPE_DEV_WC_SG	SNDRV_DMA_TYPE_DEV_WC
+#endif
 
 /*
  * info for buffer allocation
-- 
cgit v1.2.3


From 0f0ac158d28ff78e75c334e869b1cb8e69372a1f Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Sun, 24 Oct 2021 16:37:05 +1300
Subject: platform/x86: asus-wmi: Add support for custom fan curves

Add support for custom fan curves found on some ASUS ROG laptops.

These laptops have the ability to set a custom curve for the CPU
and GPU fans via two ACPI methods.

This patch adds two pwm<N> attributes to the hwmon sysfs,
pwm1 for CPU fan, pwm2 for GPU fan. Both are under the hwmon of the
name `asus_custom_fan_curve`. There is no safety check of the set
fan curves - this must be done in userspace.

The fans have settings [1,2,3] under pwm<N>_enable:
1. Enable and write settings out
2. Disable and use factory fan mode
3. Same as 2, additionally restoring default factory curve.

Use of 2 means that the curve the user has set is still stored and
won't be erased, but the laptop will be using its default auto-fan
mode. Re-enabling the manual mode then activates the curves again.

Notes:
- pwm<N>_enable = 0 is an invalid setting.
- pwm is actually a percentage and is scaled on writing to device.

Signed-off-by: Luke D. Jones <luke@ljones.dev>
Link: https://lore.kernel.org/r/20211024033705.5595-2-luke@ljones.dev
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 17dc5cb6f3f2..a571b47ff362 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -77,6 +77,8 @@
 #define ASUS_WMI_DEVID_THERMAL_CTRL	0x00110011
 #define ASUS_WMI_DEVID_FAN_CTRL		0x00110012 /* deprecated */
 #define ASUS_WMI_DEVID_CPU_FAN_CTRL	0x00110013
+#define ASUS_WMI_DEVID_CPU_FAN_CURVE	0x00110024
+#define ASUS_WMI_DEVID_GPU_FAN_CURVE	0x00110025
 
 /* Power */
 #define ASUS_WMI_DEVID_PROCESSOR_STATE	0x00120012
-- 
cgit v1.2.3


From 38543b72fbe52b7eec0dedd420d80a06c652d8e4 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 28 Oct 2021 02:22:41 +0200
Subject: platform/surface: aggregator: Make client device removal more generic

Currently, there are similar functions defined in the Aggregator
Registry and the controller core.

Make client device removal more generic and export it. We can then use
this function later on to remove client devices from device hubs as well
as the controller and avoid re-defining similar things.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20211028002243.1586083-2-luzmaximilian@gmail.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/surface_aggregator/device.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index f636c5310321..cc257097eb05 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -319,6 +319,15 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d);
 		      ssam_device_driver_unregister)
 
 
+/* -- Helpers for controller and hub devices. ------------------------------- */
+
+#ifdef CONFIG_SURFACE_AGGREGATOR_BUS
+void ssam_remove_clients(struct device *dev);
+#else /* CONFIG_SURFACE_AGGREGATOR_BUS */
+static inline void ssam_remove_clients(struct device *dev) {}
+#endif /* CONFIG_SURFACE_AGGREGATOR_BUS */
+
+
 /* -- Helpers for client-device requests. ----------------------------------- */
 
 /**
-- 
cgit v1.2.3


From b04cc0d912eb80d3c438b11d96ca847c3e77e8ab Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Mon, 25 Oct 2021 21:56:31 +0100
Subject: memory: renesas-rpc-if: Add support for RZ/G2L

SPI Multi I/O Bus Controller on RZ/G2L SoC is almost identical to
the RPC-IF interface found on R-Car Gen3 SoC's.

This patch adds a new compatible string for the RZ/G2L family so
that the timing values on RZ/G2L can be adjusted.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20211025205631.21151-8-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 include/memory/renesas-rpc-if.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h
index 77c694a19149..7c93f5177532 100644
--- a/include/memory/renesas-rpc-if.h
+++ b/include/memory/renesas-rpc-if.h
@@ -57,6 +57,11 @@ struct rpcif_op {
 	} data;
 };
 
+enum rpcif_type {
+	RPCIF_RCAR_GEN3,
+	RPCIF_RZ_G2L,
+};
+
 struct rpcif {
 	struct device *dev;
 	void __iomem *base;
@@ -64,6 +69,7 @@ struct rpcif {
 	struct regmap *regmap;
 	struct reset_control *rstc;
 	size_t size;
+	enum rpcif_type type;
 	enum rpcif_data_dir dir;
 	u8 bus_size;
 	void *buffer;
@@ -78,7 +84,7 @@ struct rpcif {
 };
 
 int rpcif_sw_init(struct rpcif *rpc, struct device *dev);
-void rpcif_hw_init(struct rpcif *rpc, bool hyperflash);
+int rpcif_hw_init(struct rpcif *rpc, bool hyperflash);
 void rpcif_prepare(struct rpcif *rpc, const struct rpcif_op *op, u64 *offs,
 		   size_t *len);
 int rpcif_manual_xfer(struct rpcif *rpc);
-- 
cgit v1.2.3


From ebf7f6f0a6cdcc17a3da52b81e4b3a98c4005028 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 5 Nov 2021 09:30:00 +0800
Subject: bpf: Change value of MAX_TAIL_CALL_CNT from 32 to 33
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the current code, the actual max tail call count is 33 which is greater
than MAX_TAIL_CALL_CNT (defined as 32). The actual limit is not consistent
with the meaning of MAX_TAIL_CALL_CNT and thus confusing at first glance.
We can see the historical evolution from commit 04fd61ab36ec ("bpf: allow
bpf programs to tail-call other bpf programs") and commit f9dabe016b63
("bpf: Undo off-by-one in interpreter tail call count limit"). In order
to avoid changing existing behavior, the actual limit is 33 now, this is
reasonable.

After commit 874be05f525e ("bpf, tests: Add tail call test suite"), we can
see there exists failed testcase.

On all archs when CONFIG_BPF_JIT_ALWAYS_ON is not set:
 # echo 0 > /proc/sys/net/core/bpf_jit_enable
 # modprobe test_bpf
 # dmesg | grep -w FAIL
 Tail call error path, max count reached jited:0 ret 34 != 33 FAIL

On some archs:
 # echo 1 > /proc/sys/net/core/bpf_jit_enable
 # modprobe test_bpf
 # dmesg | grep -w FAIL
 Tail call error path, max count reached jited:1 ret 34 != 33 FAIL

Although the above failed testcase has been fixed in commit 18935a72eb25
("bpf/tests: Fix error in tail call limit tests"), it would still be good
to change the value of MAX_TAIL_CALL_CNT from 32 to 33 to make the code
more readable.

The 32-bit x86 JIT was using a limit of 32, just fix the wrong comments and
limit to 33 tail calls as the constant MAX_TAIL_CALL_CNT updated. For the
mips64 JIT, use "ori" instead of "addiu" as suggested by Johan Almbladh.
For the riscv JIT, use RV_REG_TCC directly to save one register move as
suggested by Björn Töpel. For the other implementations, no function changes,
it does not change the current limit 33, the new value of MAX_TAIL_CALL_CNT
can reflect the actual max tail call count, the related tail call testcases
in test_bpf module and selftests can work well for the interpreter and the
JIT.

Here are the test results on x86_64:

 # uname -m
 x86_64
 # echo 0 > /proc/sys/net/core/bpf_jit_enable
 # modprobe test_bpf test_suite=test_tail_calls
 # dmesg | tail -1
 test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed]
 # rmmod test_bpf
 # echo 1 > /proc/sys/net/core/bpf_jit_enable
 # modprobe test_bpf test_suite=test_tail_calls
 # dmesg | tail -1
 test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [8/8 JIT'ed]
 # rmmod test_bpf
 # ./test_progs -t tailcalls
 #142 tailcalls:OK
 Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Björn Töpel <bjorn@kernel.org>
Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/bpf/1636075800-3264-1-git-send-email-yangtiezhu@loongson.cn
---
 include/linux/bpf.h      | 2 +-
 include/uapi/linux/bpf.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 56098c866704..cc7a0c36e7df 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1081,7 +1081,7 @@ struct bpf_array {
 };
 
 #define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
-#define MAX_TAIL_CALL_CNT 32
+#define MAX_TAIL_CALL_CNT 33
 
 #define BPF_F_ACCESS_MASK	(BPF_F_RDONLY |		\
 				 BPF_F_RDONLY_PROG |	\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6297eafdc40f..a69e4b04ffeb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1744,7 +1744,7 @@ union bpf_attr {
  * 		if the maximum number of tail calls has been reached for this
  * 		chain of programs. This limit is defined in the kernel by the
  * 		macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
- * 		which is currently set to 32.
+ *		which is currently set to 33.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
-- 
cgit v1.2.3


From 283c6b54bca13313a4f437719f600a3ad2135847 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:31 -0800
Subject: tcp: remove dead code in __tcp_v6_send_check()

For some reason, I forgot to change __tcp_v6_send_check() at
the same time I removed (ip_summed == CHECKSUM_PARTIAL) check
in __tcp_v4_send_check()

Fixes: 98be9b12096f ("tcp: remove dead code after CHECKSUM_PARTIAL adoption")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_checksum.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index b3f4eaa88672..ea681910b7a3 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -65,15 +65,9 @@ static inline void __tcp_v6_send_check(struct sk_buff *skb,
 {
 	struct tcphdr *th = tcp_hdr(skb);
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct tcphdr, check);
-	} else {
-		th->check = tcp_v6_check(skb->len, saddr, daddr,
-					 csum_partial(th, th->doff << 2,
-						      skb->csum));
-	}
+	th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
 }
 
 static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb)
-- 
cgit v1.2.3


From 42f67eea3ba36cef2dce2e853de6ddcb2e89eb39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:33 -0800
Subject: net: use sk_is_tcp() in more places

Move sk_is_tcp() to include/net/sock.h and use it where we can.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skmsg.h | 6 ------
 include/net/sock.h    | 5 +++++
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 584d94be9c8b..18a717fe62eb 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -507,12 +507,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 	return !!psock->saved_data_ready;
 }
 
-static inline bool sk_is_tcp(const struct sock *sk)
-{
-	return sk->sk_type == SOCK_STREAM &&
-	       sk->sk_protocol == IPPROTO_TCP;
-}
-
 static inline bool sk_is_udp(const struct sock *sk)
 {
 	return sk->sk_type == SOCK_DGRAM &&
diff --git a/include/net/sock.h b/include/net/sock.h
index b32906e1ab55..5bdeffdea5ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2638,6 +2638,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
 			   &skb_shinfo(skb)->tskey);
 }
 
+static inline bool sk_is_tcp(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
+}
+
 /**
  * sk_eat_skb - Release a skb if it is no longer needed
  * @sk: socket to eat this skb from
-- 
cgit v1.2.3


From d0d598ca86bd9e595f16a39097707c90841afe80 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:34 -0800
Subject: net: remove sk_route_forced_caps

We were only using one bit, and we can replace it by sk_is_tcp()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 5bdeffdea5ec..ebad629dd9ed 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -285,8 +285,6 @@ struct bpf_local_storage;
   *	@sk_no_check_rx: allow zero checksum in RX packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
-  *	@sk_route_forced_caps: static, forced route capabilities
-  *		(set in tcp_init_sock())
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
   *	@sk_gso_max_size: Maximum GSO segment size to build
   *	@sk_gso_max_segs: Maximum number of GSO segments
@@ -461,7 +459,6 @@ struct sock {
 	struct page_frag	sk_frag;
 	netdev_features_t	sk_route_caps;
 	netdev_features_t	sk_route_nocaps;
-	netdev_features_t	sk_route_forced_caps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	gfp_t			sk_allocation;
-- 
cgit v1.2.3


From aba546565b613e74b84b8261999ea82b5561d3f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:35 -0800
Subject: net: remove sk_route_nocaps

Instead of using a full netdev_features_t, we can use a single bit,
as sk_route_nocaps is only used to remove NETIF_F_GSO_MASK from
sk->sk_route_cap.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index ebad629dd9ed..985ddcd33504 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -284,7 +284,7 @@ struct bpf_local_storage;
   *	@sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
   *	@sk_no_check_rx: allow zero checksum in RX packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
-  *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
+  *	@sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden.
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
   *	@sk_gso_max_size: Maximum GSO segment size to build
   *	@sk_gso_max_segs: Maximum number of GSO segments
@@ -458,7 +458,6 @@ struct sock {
 	unsigned long		sk_max_pacing_rate;
 	struct page_frag	sk_frag;
 	netdev_features_t	sk_route_caps;
-	netdev_features_t	sk_route_nocaps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	gfp_t			sk_allocation;
@@ -468,7 +467,7 @@ struct sock {
 	 * Because of non atomicity rules, all
 	 * changes are protected by socket lock.
 	 */
-	u8			sk_padding : 1,
+	u8			sk_gso_disabled : 1,
 				sk_kern_sock : 1,
 				sk_no_check_tx : 1,
 				sk_no_check_rx : 1,
@@ -2121,10 +2120,10 @@ static inline bool sk_can_gso(const struct sock *sk)
 
 void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
 
-static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
+static inline void sk_gso_disable(struct sock *sk)
 {
-	sk->sk_route_nocaps |= flags;
-	sk->sk_route_caps &= ~flags;
+	sk->sk_gso_disabled = 1;
+	sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 }
 
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
-- 
cgit v1.2.3


From 1b31debca83284486cd736757b5f26d51719ef80 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:36 -0800
Subject: ipv6: shrink struct ipcm6_cookie

gso_size can be moved after tclass, to use an existing hole.
(8 bytes saved on 64bit arches)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c19bf51ded1d..53ac7707ca70 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -345,9 +345,9 @@ struct ipcm6_cookie {
 	struct sockcm_cookie sockc;
 	__s16 hlimit;
 	__s16 tclass;
+	__u16 gso_size;
 	__s8  dontfrag;
 	struct ipv6_txoptions *opt;
-	__u16 gso_size;
 };
 
 static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
-- 
cgit v1.2.3


From 1ace2b4d2b4e1db8fc62d872ab54ab48f6215ecd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:37 -0800
Subject: net: shrink struct sock by 8 bytes

Move sk_bind_phc next to sk_peer_lock to fill a hole.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 985ddcd33504..2333ab081789 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -489,6 +489,7 @@ struct sock {
 	u16			sk_busy_poll_budget;
 #endif
 	spinlock_t		sk_peer_lock;
+	int			sk_bind_phc;
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
 
@@ -498,7 +499,6 @@ struct sock {
 	seqlock_t		sk_stamp_seq;
 #endif
 	u16			sk_tsflags;
-	int			sk_bind_phc;
 	u8			sk_shutdown;
 	u32			sk_tskey;
 	atomic_t		sk_zckey;
-- 
cgit v1.2.3


From 6c302e799a0d4be1362f505453b714fe05d91f2a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:38 -0800
Subject: net: forward_alloc_get depends on CONFIG_MPTCP

(struct proto)->sk_forward_alloc is currently only used by MPTCP.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2333ab081789..cb97c448472a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1206,7 +1206,9 @@ struct proto {
 	unsigned int		inuse_idx;
 #endif
 
+#if IS_ENABLED(CONFIG_MPTCP)
 	int			(*forward_alloc_get)(const struct sock *sk);
+#endif
 
 	bool			(*stream_memory_free)(const struct sock *sk, int wake);
 	bool			(*sock_is_readable)(struct sock *sk);
@@ -1295,10 +1297,11 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int
 
 static inline int sk_forward_alloc_get(const struct sock *sk)
 {
-	if (!sk->sk_prot->forward_alloc_get)
-		return sk->sk_forward_alloc;
-
-	return sk->sk_prot->forward_alloc_get(sk);
+#if IS_ENABLED(CONFIG_MPTCP)
+	if (sk->sk_prot->forward_alloc_get)
+		return sk->sk_prot->forward_alloc_get(sk);
+#endif
+	return sk->sk_forward_alloc;
 }
 
 static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
-- 
cgit v1.2.3


From d2489c7b6d7d5ed4b32b56703c57c47bfbfe7fa5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:41 -0800
Subject: tcp: add RETPOLINE mitigation to sk_backlog_rcv

Use INDIRECT_CALL_INET() to avoid an indirect call
when/if CONFIG_RETPOLINE=y

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index cb97c448472a..2d40fe4c7718 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1018,12 +1018,18 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s
 
 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb));
+
 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	if (sk_memalloc_socks() && skb_pfmemalloc(skb))
 		return __sk_backlog_rcv(sk, skb);
 
-	return sk->sk_backlog_rcv(sk, skb);
+	return INDIRECT_CALL_INET(sk->sk_backlog_rcv,
+				  tcp_v6_do_rcv,
+				  tcp_v4_do_rcv,
+				  sk, skb);
 }
 
 static inline void sk_incoming_cpu_update(struct sock *sk)
-- 
cgit v1.2.3


From 0307a0b74b3af6ecb1c8b7f727376130b15bbf44 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:42 -0800
Subject: tcp: annotate data-races on tp->segs_in and tp->data_segs_in

tcp_segs_in() can be called from BH, while socket spinlock
is held but socket owned by user, eventually reading these
fields from tcp_get_info()

Found by code inspection, no need to backport this patch
to older kernels.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4da22b41bde6..05c81677aaf7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2172,9 +2172,13 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
 	u16 segs_in;
 
 	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
-	tp->segs_in += segs_in;
+
+	/* We update these fields while other threads might
+	 * read them from tcp_get_info()
+	 */
+	WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in);
 	if (skb->len > tcp_hdrlen(skb))
-		tp->data_segs_in += segs_in;
+		WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in);
 }
 
 /*
-- 
cgit v1.2.3


From f35f821935d8df76f9c92e2431a225bdff938169 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:46 -0800
Subject: tcp: defer skb freeing after socket lock is released

tcp recvmsg() (or rx zerocopy) spends a fair amount of time
freeing skbs after their payload has been consumed.

A typical ~64KB GRO packet has to release ~45 page
references, eventually going to page allocator
for each of them.

Currently, this freeing is performed while socket lock
is held, meaning that there is a high chance that
BH handler has to queue incoming packets to tcp socket backlog.

This can cause additional latencies, because the user
thread has to process the backlog at release_sock() time,
and while doing so, additional frames can be added
by BH handler.

This patch adds logic to defer these frees after socket
lock is released, or directly from BH handler if possible.

Being able to free these skbs from BH handler helps a lot,
because this avoids the usual alloc/free assymetry,
when BH handler and user thread do not run on same cpu or
NUMA node.

One cpu can now be fully utilized for the kernel->user copy,
and another cpu is handling BH processing and skb/page
allocs/frees (assuming RFS is not forcing use of a single CPU)

Tested:
 100Gbit NIC
 Max throughput for one TCP_STREAM flow, over 10 runs

MTU : 1500
Before: 55 Gbit
After:  66 Gbit

MTU : 4096+(headers)
Before: 82 Gbit
After:  95 Gbit

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 include/net/sock.h     |  3 +++
 include/net/tcp.h      | 10 ++++++++++
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 686a666d073d..b8b806512e16 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -36,6 +36,7 @@
 #include <linux/splice.h>
 #include <linux/in6.h>
 #include <linux/if_packet.h>
+#include <linux/llist.h>
 #include <net/flow.h>
 #include <net/page_pool.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -743,6 +744,7 @@ struct sk_buff {
 		};
 		struct rb_node		rbnode; /* used in netem, ip4 defrag, and tcp stack */
 		struct list_head	list;
+		struct llist_node	ll_node;
 	};
 
 	union {
diff --git a/include/net/sock.h b/include/net/sock.h
index 2d40fe4c7718..2578d1f455a7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -63,6 +63,7 @@
 #include <linux/indirect_call_wrapper.h>
 #include <linux/atomic.h>
 #include <linux/refcount.h>
+#include <linux/llist.h>
 #include <net/dst.h>
 #include <net/checksum.h>
 #include <net/tcp_states.h>
@@ -408,6 +409,8 @@ struct sock {
 		struct sk_buff	*head;
 		struct sk_buff	*tail;
 	} sk_backlog;
+	struct llist_head defer_list;
+
 #define sk_rmem_alloc sk_backlog.rmem_alloc
 
 	int			sk_forward_alloc;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 05c81677aaf7..44e442bf23f9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1368,6 +1368,16 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
 }
 
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+
+void __sk_defer_free_flush(struct sock *sk);
+
+static inline void sk_defer_free_flush(struct sock *sk)
+{
+	if (llist_empty(&sk->defer_list))
+		return;
+	__sk_defer_free_flush(sk);
+}
+
 int tcp_filter(struct sock *sk, struct sk_buff *skb);
 void tcp_set_state(struct sock *sk, int state);
 void tcp_done(struct sock *sk);
-- 
cgit v1.2.3


From 43f51df4172955971ef5498f09308a9dc0291766 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 11:02:49 -0800
Subject: net: move early demux fields close to sk_refcnt

sk_rx_dst/sk_rx_dst_ifindex/sk_rx_dst_cookie are read in early demux,
and currently spans two cache lines.

Moving them close to sk_refcnt makes more sense, as only one cache
line is needed.

New layout for this hot cache line is :

struct sock {
	struct sock_common         __sk_common;          /*     0  0x88 */
	/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
	struct dst_entry *         sk_rx_dst;            /*  0x88   0x8 */
	int                        sk_rx_dst_ifindex;    /*  0x90   0x4 */
	u32                        sk_rx_dst_cookie;     /*  0x94   0x4 */
	socket_lock_t              sk_lock;              /*  0x98  0x20 */
	atomic_t                   sk_drops;             /*  0xb8   0x4 */
	int                        sk_rcvlowat;          /*  0xbc   0x4 */
	/* --- cacheline 3 boundary (192 bytes) --- */

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2578d1f455a7..95cc03bd3fac 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -390,6 +390,11 @@ struct sock {
 #define sk_flags		__sk_common.skc_flags
 #define sk_rxhash		__sk_common.skc_rxhash
 
+	/* early demux fields */
+	struct dst_entry	*sk_rx_dst;
+	int			sk_rx_dst_ifindex;
+	u32			sk_rx_dst_cookie;
+
 	socket_lock_t		sk_lock;
 	atomic_t		sk_drops;
 	int			sk_rcvlowat;
@@ -432,9 +437,6 @@ struct sock {
 #ifdef CONFIG_XFRM
 	struct xfrm_policy __rcu *sk_policy[2];
 #endif
-	struct dst_entry	*sk_rx_dst;
-	int			sk_rx_dst_ifindex;
-	u32			sk_rx_dst_cookie;
 
 	struct dst_entry __rcu	*sk_dst_cache;
 	atomic_t		sk_omem_alloc;
-- 
cgit v1.2.3


From 4721031c3559db8eae61df305f10c00099a7c1d0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:05:51 -0800
Subject: net: move gro definitions to include/net/gro.h

include/linux/netdevice.h became too big, move gro stuff
into include/net/gro.h

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 348 ---------------------------------------
 include/net/gro.h          | 396 ++++++++++++++++++++++++++++++++++++++++++++-
 include/net/ip.h           |   8 -
 include/net/ip6_checksum.h |   8 -
 include/net/udp.h          |  24 ---
 5 files changed, 394 insertions(+), 390 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec42495a43a..d95c9839ce90 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2520,109 +2520,6 @@ static inline void netif_napi_del(struct napi_struct *napi)
 	synchronize_net();
 }
 
-struct napi_gro_cb {
-	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
-	void	*frag0;
-
-	/* Length of frag0. */
-	unsigned int frag0_len;
-
-	/* This indicates where we are processing relative to skb->data. */
-	int	data_offset;
-
-	/* This is non-zero if the packet cannot be merged with the new skb. */
-	u16	flush;
-
-	/* Save the IP ID here and check when we get to the transport layer */
-	u16	flush_id;
-
-	/* Number of segments aggregated. */
-	u16	count;
-
-	/* Start offset for remote checksum offload */
-	u16	gro_remcsum_start;
-
-	/* jiffies when first packet was created/queued */
-	unsigned long age;
-
-	/* Used in ipv6_gro_receive() and foo-over-udp */
-	u16	proto;
-
-	/* This is non-zero if the packet may be of the same flow. */
-	u8	same_flow:1;
-
-	/* Used in tunnel GRO receive */
-	u8	encap_mark:1;
-
-	/* GRO checksum is valid */
-	u8	csum_valid:1;
-
-	/* Number of checksums via CHECKSUM_UNNECESSARY */
-	u8	csum_cnt:3;
-
-	/* Free the skb? */
-	u8	free:2;
-#define NAPI_GRO_FREE		  1
-#define NAPI_GRO_FREE_STOLEN_HEAD 2
-
-	/* Used in foo-over-udp, set in udp[46]_gro_receive */
-	u8	is_ipv6:1;
-
-	/* Used in GRE, set in fou/gue_gro_receive */
-	u8	is_fou:1;
-
-	/* Used to determine if flush_id can be ignored */
-	u8	is_atomic:1;
-
-	/* Number of gro_receive callbacks this packet already went through */
-	u8 recursion_counter:4;
-
-	/* GRO is done by frag_list pointer chaining. */
-	u8	is_flist:1;
-
-	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
-	__wsum	csum;
-
-	/* used in skb_gro_receive() slow path */
-	struct sk_buff *last;
-};
-
-#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
-
-#define GRO_RECURSION_LIMIT 15
-static inline int gro_recursion_inc_test(struct sk_buff *skb)
-{
-	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
-}
-
-typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
-static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
-					       struct list_head *head,
-					       struct sk_buff *skb)
-{
-	if (unlikely(gro_recursion_inc_test(skb))) {
-		NAPI_GRO_CB(skb)->flush |= 1;
-		return NULL;
-	}
-
-	return cb(head, skb);
-}
-
-typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
-					    struct sk_buff *);
-static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
-						  struct sock *sk,
-						  struct list_head *head,
-						  struct sk_buff *skb)
-{
-	if (unlikely(gro_recursion_inc_test(skb))) {
-		NAPI_GRO_CB(skb)->flush |= 1;
-		return NULL;
-	}
-
-	return cb(sk, head, skb);
-}
-
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
 	bool			ignore_outgoing;
@@ -3008,251 +2905,6 @@ int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
 
-static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
-{
-	return NAPI_GRO_CB(skb)->data_offset;
-}
-
-static inline unsigned int skb_gro_len(const struct sk_buff *skb)
-{
-	return skb->len - NAPI_GRO_CB(skb)->data_offset;
-}
-
-static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
-{
-	NAPI_GRO_CB(skb)->data_offset += len;
-}
-
-static inline void *skb_gro_header_fast(struct sk_buff *skb,
-					unsigned int offset)
-{
-	return NAPI_GRO_CB(skb)->frag0 + offset;
-}
-
-static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
-{
-	return NAPI_GRO_CB(skb)->frag0_len < hlen;
-}
-
-static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
-{
-	NAPI_GRO_CB(skb)->frag0 = NULL;
-	NAPI_GRO_CB(skb)->frag0_len = 0;
-}
-
-static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
-					unsigned int offset)
-{
-	if (!pskb_may_pull(skb, hlen))
-		return NULL;
-
-	skb_gro_frag0_invalidate(skb);
-	return skb->data + offset;
-}
-
-static inline void *skb_gro_network_header(struct sk_buff *skb)
-{
-	return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
-	       skb_network_offset(skb);
-}
-
-static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
-					const void *start, unsigned int len)
-{
-	if (NAPI_GRO_CB(skb)->csum_valid)
-		NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
-						  csum_partial(start, len, 0));
-}
-
-/* GRO checksum functions. These are logical equivalents of the normal
- * checksum functions (in skbuff.h) except that they operate on the GRO
- * offsets and fields in sk_buff.
- */
-
-__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
-
-static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
-{
-	return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
-}
-
-static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
-						      bool zero_okay,
-						      __sum16 check)
-{
-	return ((skb->ip_summed != CHECKSUM_PARTIAL ||
-		skb_checksum_start_offset(skb) <
-		 skb_gro_offset(skb)) &&
-		!skb_at_gro_remcsum_start(skb) &&
-		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
-		(!zero_okay || check));
-}
-
-static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
-							   __wsum psum)
-{
-	if (NAPI_GRO_CB(skb)->csum_valid &&
-	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
-		return 0;
-
-	NAPI_GRO_CB(skb)->csum = psum;
-
-	return __skb_gro_checksum_complete(skb);
-}
-
-static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
-{
-	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
-		/* Consume a checksum from CHECKSUM_UNNECESSARY */
-		NAPI_GRO_CB(skb)->csum_cnt--;
-	} else {
-		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
-		 * verified a new top level checksum or an encapsulated one
-		 * during GRO. This saves work if we fallback to normal path.
-		 */
-		__skb_incr_checksum_unnecessary(skb);
-	}
-}
-
-#define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
-				    compute_pseudo)			\
-({									\
-	__sum16 __ret = 0;						\
-	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
-		__ret = __skb_gro_checksum_validate_complete(skb,	\
-				compute_pseudo(skb, proto));		\
-	if (!__ret)							\
-		skb_gro_incr_csum_unnecessary(skb);			\
-	__ret;								\
-})
-
-#define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
-	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
-
-#define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
-					     compute_pseudo)		\
-	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
-
-#define skb_gro_checksum_simple_validate(skb)				\
-	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
-
-static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
-{
-	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
-		!NAPI_GRO_CB(skb)->csum_valid);
-}
-
-static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
-					      __wsum pseudo)
-{
-	NAPI_GRO_CB(skb)->csum = ~pseudo;
-	NAPI_GRO_CB(skb)->csum_valid = 1;
-}
-
-#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo)	\
-do {									\
-	if (__skb_gro_checksum_convert_check(skb))			\
-		__skb_gro_checksum_convert(skb, 			\
-					   compute_pseudo(skb, proto));	\
-} while (0)
-
-struct gro_remcsum {
-	int offset;
-	__wsum delta;
-};
-
-static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
-{
-	grc->offset = 0;
-	grc->delta = 0;
-}
-
-static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
-					    unsigned int off, size_t hdrlen,
-					    int start, int offset,
-					    struct gro_remcsum *grc,
-					    bool nopartial)
-{
-	__wsum delta;
-	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
-
-	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
-
-	if (!nopartial) {
-		NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
-		return ptr;
-	}
-
-	ptr = skb_gro_header_fast(skb, off);
-	if (skb_gro_header_hard(skb, off + plen)) {
-		ptr = skb_gro_header_slow(skb, off + plen, off);
-		if (!ptr)
-			return NULL;
-	}
-
-	delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
-			       start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
-
-	grc->offset = off + hdrlen + offset;
-	grc->delta = delta;
-
-	return ptr;
-}
-
-static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
-					   struct gro_remcsum *grc)
-{
-	void *ptr;
-	size_t plen = grc->offset + sizeof(u16);
-
-	if (!grc->delta)
-		return;
-
-	ptr = skb_gro_header_fast(skb, grc->offset);
-	if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) {
-		ptr = skb_gro_header_slow(skb, plen, grc->offset);
-		if (!ptr)
-			return;
-	}
-
-	remcsum_unadjust((__sum16 *)ptr, grc->delta);
-}
-
-#ifdef CONFIG_XFRM_OFFLOAD
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
-{
-	if (PTR_ERR(pp) != -EINPROGRESS)
-		NAPI_GRO_CB(skb)->flush |= flush;
-}
-static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
-					       struct sk_buff *pp,
-					       int flush,
-					       struct gro_remcsum *grc)
-{
-	if (PTR_ERR(pp) != -EINPROGRESS) {
-		NAPI_GRO_CB(skb)->flush |= flush;
-		skb_gro_remcsum_cleanup(skb, grc);
-		skb->remcsum_offload = 0;
-	}
-}
-#else
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
-{
-	NAPI_GRO_CB(skb)->flush |= flush;
-}
-static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
-					       struct sk_buff *pp,
-					       int flush,
-					       struct gro_remcsum *grc)
-{
-	NAPI_GRO_CB(skb)->flush |= flush;
-	skb_gro_remcsum_cleanup(skb, grc);
-	skb->remcsum_offload = 0;
-}
-#endif
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
diff --git a/include/net/gro.h b/include/net/gro.h
index 01edaf3fdda0..1ffbe74b2e35 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -4,9 +4,367 @@
 #define _NET_IPV6_GRO_H
 
 #include <linux/indirect_call_wrapper.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <net/udp.h>
 
-struct list_head;
-struct sk_buff;
+struct napi_gro_cb {
+	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
+	void	*frag0;
+
+	/* Length of frag0. */
+	unsigned int frag0_len;
+
+	/* This indicates where we are processing relative to skb->data. */
+	int	data_offset;
+
+	/* This is non-zero if the packet cannot be merged with the new skb. */
+	u16	flush;
+
+	/* Save the IP ID here and check when we get to the transport layer */
+	u16	flush_id;
+
+	/* Number of segments aggregated. */
+	u16	count;
+
+	/* Start offset for remote checksum offload */
+	u16	gro_remcsum_start;
+
+	/* jiffies when first packet was created/queued */
+	unsigned long age;
+
+	/* Used in ipv6_gro_receive() and foo-over-udp */
+	u16	proto;
+
+	/* This is non-zero if the packet may be of the same flow. */
+	u8	same_flow:1;
+
+	/* Used in tunnel GRO receive */
+	u8	encap_mark:1;
+
+	/* GRO checksum is valid */
+	u8	csum_valid:1;
+
+	/* Number of checksums via CHECKSUM_UNNECESSARY */
+	u8	csum_cnt:3;
+
+	/* Free the skb? */
+	u8	free:2;
+#define NAPI_GRO_FREE		  1
+#define NAPI_GRO_FREE_STOLEN_HEAD 2
+
+	/* Used in foo-over-udp, set in udp[46]_gro_receive */
+	u8	is_ipv6:1;
+
+	/* Used in GRE, set in fou/gue_gro_receive */
+	u8	is_fou:1;
+
+	/* Used to determine if flush_id can be ignored */
+	u8	is_atomic:1;
+
+	/* Number of gro_receive callbacks this packet already went through */
+	u8 recursion_counter:4;
+
+	/* GRO is done by frag_list pointer chaining. */
+	u8	is_flist:1;
+
+	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
+	__wsum	csum;
+
+	/* used in skb_gro_receive() slow path */
+	struct sk_buff *last;
+};
+
+#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
+
+#define GRO_RECURSION_LIMIT 15
+static inline int gro_recursion_inc_test(struct sk_buff *skb)
+{
+	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
+}
+
+typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
+static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
+					       struct list_head *head,
+					       struct sk_buff *skb)
+{
+	if (unlikely(gro_recursion_inc_test(skb))) {
+		NAPI_GRO_CB(skb)->flush |= 1;
+		return NULL;
+	}
+
+	return cb(head, skb);
+}
+
+typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
+					    struct sk_buff *);
+static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
+						  struct sock *sk,
+						  struct list_head *head,
+						  struct sk_buff *skb)
+{
+	if (unlikely(gro_recursion_inc_test(skb))) {
+		NAPI_GRO_CB(skb)->flush |= 1;
+		return NULL;
+	}
+
+	return cb(sk, head, skb);
+}
+
+static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
+{
+	return NAPI_GRO_CB(skb)->data_offset;
+}
+
+static inline unsigned int skb_gro_len(const struct sk_buff *skb)
+{
+	return skb->len - NAPI_GRO_CB(skb)->data_offset;
+}
+
+static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
+{
+	NAPI_GRO_CB(skb)->data_offset += len;
+}
+
+static inline void *skb_gro_header_fast(struct sk_buff *skb,
+					unsigned int offset)
+{
+	return NAPI_GRO_CB(skb)->frag0 + offset;
+}
+
+static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
+{
+	return NAPI_GRO_CB(skb)->frag0_len < hlen;
+}
+
+static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
+{
+	NAPI_GRO_CB(skb)->frag0 = NULL;
+	NAPI_GRO_CB(skb)->frag0_len = 0;
+}
+
+static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
+					unsigned int offset)
+{
+	if (!pskb_may_pull(skb, hlen))
+		return NULL;
+
+	skb_gro_frag0_invalidate(skb);
+	return skb->data + offset;
+}
+
+static inline void *skb_gro_network_header(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
+	       skb_network_offset(skb);
+}
+
+static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	const struct iphdr *iph = skb_gro_network_header(skb);
+
+	return csum_tcpudp_nofold(iph->saddr, iph->daddr,
+				  skb_gro_len(skb), proto, 0);
+}
+
+static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
+					const void *start, unsigned int len)
+{
+	if (NAPI_GRO_CB(skb)->csum_valid)
+		NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
+						  csum_partial(start, len, 0));
+}
+
+/* GRO checksum functions. These are logical equivalents of the normal
+ * checksum functions (in skbuff.h) except that they operate on the GRO
+ * offsets and fields in sk_buff.
+ */
+
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
+
+static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
+}
+
+static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
+						      bool zero_okay,
+						      __sum16 check)
+{
+	return ((skb->ip_summed != CHECKSUM_PARTIAL ||
+		skb_checksum_start_offset(skb) <
+		 skb_gro_offset(skb)) &&
+		!skb_at_gro_remcsum_start(skb) &&
+		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+		(!zero_okay || check));
+}
+
+static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
+							   __wsum psum)
+{
+	if (NAPI_GRO_CB(skb)->csum_valid &&
+	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
+		return 0;
+
+	NAPI_GRO_CB(skb)->csum = psum;
+
+	return __skb_gro_checksum_complete(skb);
+}
+
+static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
+{
+	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
+		/* Consume a checksum from CHECKSUM_UNNECESSARY */
+		NAPI_GRO_CB(skb)->csum_cnt--;
+	} else {
+		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
+		 * verified a new top level checksum or an encapsulated one
+		 * during GRO. This saves work if we fallback to normal path.
+		 */
+		__skb_incr_checksum_unnecessary(skb);
+	}
+}
+
+#define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
+				    compute_pseudo)			\
+({									\
+	__sum16 __ret = 0;						\
+	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_gro_checksum_validate_complete(skb,	\
+				compute_pseudo(skb, proto));		\
+	if (!__ret)							\
+		skb_gro_incr_csum_unnecessary(skb);			\
+	__ret;								\
+})
+
+#define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
+
+#define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
+					     compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
+
+#define skb_gro_checksum_simple_validate(skb)				\
+	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
+
+static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+		!NAPI_GRO_CB(skb)->csum_valid);
+}
+
+static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
+					      __wsum pseudo)
+{
+	NAPI_GRO_CB(skb)->csum = ~pseudo;
+	NAPI_GRO_CB(skb)->csum_valid = 1;
+}
+
+#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo)	\
+do {									\
+	if (__skb_gro_checksum_convert_check(skb))			\
+		__skb_gro_checksum_convert(skb, 			\
+					   compute_pseudo(skb, proto));	\
+} while (0)
+
+struct gro_remcsum {
+	int offset;
+	__wsum delta;
+};
+
+static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
+{
+	grc->offset = 0;
+	grc->delta = 0;
+}
+
+static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
+					    unsigned int off, size_t hdrlen,
+					    int start, int offset,
+					    struct gro_remcsum *grc,
+					    bool nopartial)
+{
+	__wsum delta;
+	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
+
+	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
+
+	if (!nopartial) {
+		NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
+		return ptr;
+	}
+
+	ptr = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, off + plen)) {
+		ptr = skb_gro_header_slow(skb, off + plen, off);
+		if (!ptr)
+			return NULL;
+	}
+
+	delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
+			       start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+	grc->offset = off + hdrlen + offset;
+	grc->delta = delta;
+
+	return ptr;
+}
+
+static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
+					   struct gro_remcsum *grc)
+{
+	void *ptr;
+	size_t plen = grc->offset + sizeof(u16);
+
+	if (!grc->delta)
+		return;
+
+	ptr = skb_gro_header_fast(skb, grc->offset);
+	if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) {
+		ptr = skb_gro_header_slow(skb, plen, grc->offset);
+		if (!ptr)
+			return;
+	}
+
+	remcsum_unadjust((__sum16 *)ptr, grc->delta);
+}
+
+#ifdef CONFIG_XFRM_OFFLOAD
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
+{
+	if (PTR_ERR(pp) != -EINPROGRESS)
+		NAPI_GRO_CB(skb)->flush |= flush;
+}
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff *pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	if (PTR_ERR(pp) != -EINPROGRESS) {
+		NAPI_GRO_CB(skb)->flush |= flush;
+		skb_gro_remcsum_cleanup(skb, grc);
+		skb->remcsum_offload = 0;
+	}
+}
+#else
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
+{
+	NAPI_GRO_CB(skb)->flush |= flush;
+}
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff *pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_remcsum_cleanup(skb, grc);
+	skb->remcsum_offload = 0;
+}
+#endif
 
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
 							   struct sk_buff *));
@@ -15,6 +373,14 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
 							   struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
 
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
+							   struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
+
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+							   struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+
 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb)	\
 ({								\
 	unlikely(gro_recursion_inc_test(skb)) ?			\
@@ -22,4 +388,30 @@ INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
 		INDIRECT_CALL_INET(cb, f2, f1, head, skb);	\
 })
 
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, struct sock *sk);
+int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
+
+static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
+{
+	struct udphdr *uh;
+	unsigned int hlen, off;
+
+	off  = skb_gro_offset(skb);
+	hlen = off + sizeof(*uh);
+	uh   = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen))
+		uh = skb_gro_header_slow(skb, hlen, off);
+
+	return uh;
+}
+
+static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+	return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
+					    skb_gro_len(skb), proto, 0));
+}
+
 #endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index b71e88507c4a..7d1088888c10 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -568,14 +568,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
 	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 }
 
-static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
-{
-	const struct iphdr *iph = skb_gro_network_header(skb);
-
-	return csum_tcpudp_nofold(iph->saddr, iph->daddr,
-				  skb_gro_len(skb), proto, 0);
-}
-
 /*
  *	Map a multicast IP onto multicast MAC for type ethernet.
  */
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index ea681910b7a3..c8a96b888277 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -43,14 +43,6 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto)
 					    skb->len, proto, 0));
 }
 
-static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
-{
-	const struct ipv6hdr *iph = skb_gro_network_header(skb);
-
-	return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
-					    skb_gro_len(skb), proto, 0));
-}
-
 static __inline__ __sum16 tcp_v6_check(int len,
 				   const struct in6_addr *saddr,
 				   const struct in6_addr *daddr,
diff --git a/include/net/udp.h b/include/net/udp.h
index 909ecf447e0f..f1c2a88c9005 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,36 +167,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
 typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
 				     __be16 dport);
 
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
-							   struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
-							   struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
 INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
 
-struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
-				struct udphdr *uh, struct sock *sk);
-int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
-
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 				  netdev_features_t features, bool is_ipv6);
 
-static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
-{
-	struct udphdr *uh;
-	unsigned int hlen, off;
-
-	off  = skb_gro_offset(skb);
-	hlen = off + sizeof(*uh);
-	uh   = skb_gro_header_fast(skb, off);
-	if (skb_gro_header_hard(skb, hlen))
-		uh = skb_gro_header_slow(skb, hlen, off);
-
-	return uh;
-}
-
 /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
 static inline int udp_lib_hash(struct sock *sk)
 {
-- 
cgit v1.2.3


From 0b935d7f8c07bf0a192712bdbf76dbf45ef8b115 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:05:52 -0800
Subject: net: gro: move skb_gro_receive_list to udp_offload.c

This helper is used once, no need to keep it in fat net/core/skbuff.c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d95c9839ce90..ce6ee1453dbc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2903,7 +2903,6 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
-int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
 
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
-- 
cgit v1.2.3


From e456a18a390b96f22b0de2acd4d0f49c72ed2280 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:05:53 -0800
Subject: net: gro: move skb_gro_receive into net/core/gro.c

net/core/gro.c will contain all core gro functions,
to shrink net/core/skbuff.c and net/core/dev.c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 include/net/gro.h         | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ce6ee1453dbc..93d397db9ec4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2902,7 +2902,6 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
-int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
 
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/include/net/gro.h b/include/net/gro.h
index 1ffbe74b2e35..f988bf3440f8 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -414,4 +414,6 @@ static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
 					    skb_gro_len(skb), proto, 0));
 }
 
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+
 #endif /* _NET_IPV6_GRO_H */
-- 
cgit v1.2.3


From 587652bbdd06ab38a4c1b85e40f933d2cf4a1147 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:05:54 -0800
Subject: net: gro: populate net/core/gro.c

Move gro code and data from net/core/dev.c to net/core/gro.c
to ease maintenance.

gro_normal_list() and gro_normal_one() are inlined
because they are called from both files.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/gro.h         | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93d397db9ec4..31a7e6b27681 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3657,6 +3657,7 @@ int netif_rx_ni(struct sk_buff *skb);
 int netif_rx_any_context(struct sk_buff *skb);
 int netif_receive_skb(struct sk_buff *skb);
 int netif_receive_skb_core(struct sk_buff *skb);
+void netif_receive_skb_list_internal(struct list_head *head);
 void netif_receive_skb_list(struct list_head *head);
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
diff --git a/include/net/gro.h b/include/net/gro.h
index f988bf3440f8..d0e7df691a80 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -416,4 +416,26 @@ static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
 
 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
 
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static inline void gro_normal_list(struct napi_struct *napi)
+{
+	if (!napi->rx_count)
+		return;
+	netif_receive_skb_list_internal(&napi->rx_list);
+	INIT_LIST_HEAD(&napi->rx_list);
+	napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
+{
+	list_add_tail(&skb->list, &napi->rx_list);
+	napi->rx_count += segs;
+	if (napi->rx_count >= gro_normal_batch)
+		gro_normal_list(napi);
+}
+
+
 #endif /* _NET_IPV6_GRO_H */
-- 
cgit v1.2.3


From 2a12ae5d433df3d3c3f1a930799ec09cb2b8058f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:11:47 -0800
Subject: net: inline sock_prot_inuse_add()

sock_prot_inuse_add() is very small, we can inline it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 95cc03bd3fac..5a1e1df3cefd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1429,13 +1429,21 @@ proto_memory_pressure(struct proto *prot)
 
 
 #ifdef CONFIG_PROC_FS
+#define PROTO_INUSE_NR	64	/* should be enough for the first time */
+struct prot_inuse {
+	int val[PROTO_INUSE_NR];
+};
 /* Called with local bh disabled */
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
+static inline void sock_prot_inuse_add(const struct net *net,
+				       const struct proto *prot, int val)
+{
+	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
+}
 int sock_prot_inuse_get(struct net *net, struct proto *proto);
 int sock_inuse_get(struct net *net);
 #else
-static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
-		int inc)
+static inline void sock_prot_inuse_add(const struct net *net,
+				       const struct proto *prot, int val)
 {
 }
 #endif
-- 
cgit v1.2.3


From d477eb9004845cb2dc92ad5eed79a437738a868a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:11:48 -0800
Subject: net: make sock_inuse_add() available

MPTCP hard codes it, let us instead provide this helper.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 5a1e1df3cefd..c4c981a51797 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1439,6 +1439,12 @@ static inline void sock_prot_inuse_add(const struct net *net,
 {
 	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
 }
+
+static inline void sock_inuse_add(const struct net *net, int val)
+{
+	this_cpu_add(*net->core.sock_inuse, val);
+}
+
 int sock_prot_inuse_get(struct net *net, struct proto *proto);
 int sock_inuse_get(struct net *net);
 #else
@@ -1446,6 +1452,10 @@ static inline void sock_prot_inuse_add(const struct net *net,
 				       const struct proto *prot, int val)
 {
 }
+
+static inline void sock_inuse_add(const struct net *net, int val)
+{
+}
 #endif
 
 
-- 
cgit v1.2.3


From 4199bae10c49e24bc2c5d8c06a68820d56640000 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:11:49 -0800
Subject: net: merge net->core.prot_inuse and net->core.sock_inuse

net->core.sock_inuse is a per cpu variable (int),
while net->core.prot_inuse is another per cpu variable
of 64 integers.

per cpu allocator tend to place them in very different places.

Grouping them together makes sense, since it makes
updates potentially faster, if hitting the same
cache line.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/core.h | 1 -
 include/net/sock.h       | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 36c2d998a43c..552bc25b1933 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -12,7 +12,6 @@ struct netns_core {
 	int	sysctl_somaxconn;
 
 #ifdef CONFIG_PROC_FS
-	int __percpu *sock_inuse;
 	struct prot_inuse __percpu *prot_inuse;
 #endif
 };
diff --git a/include/net/sock.h b/include/net/sock.h
index c4c981a51797..5589312531df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1431,6 +1431,7 @@ proto_memory_pressure(struct proto *prot)
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
 struct prot_inuse {
+	int all;
 	int val[PROTO_INUSE_NR];
 };
 /* Called with local bh disabled */
@@ -1442,7 +1443,7 @@ static inline void sock_prot_inuse_add(const struct net *net,
 
 static inline void sock_inuse_add(const struct net *net, int val)
 {
-	this_cpu_add(*net->core.sock_inuse, val);
+	this_cpu_add(net->core.prot_inuse->all, val);
 }
 
 int sock_prot_inuse_get(struct net *net, struct proto *proto);
-- 
cgit v1.2.3


From b3cb764aa1d753cf6a58858f9e2097ba71e8100b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:11:50 -0800
Subject: net: drop nopreempt requirement on sock_prot_inuse_add()

This is distracting really, let's make this simpler,
because many callers had to take care of this
by themselves, even if on x86 this adds more
code than really needed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 5589312531df..f09c0c4736c4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1434,11 +1434,11 @@ struct prot_inuse {
 	int all;
 	int val[PROTO_INUSE_NR];
 };
-/* Called with local bh disabled */
+
 static inline void sock_prot_inuse_add(const struct net *net,
 				       const struct proto *prot, int val)
 {
-	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
+	this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
 }
 
 static inline void sock_inuse_add(const struct net *net, int val)
-- 
cgit v1.2.3


From 2bd1b237616bd91a3f4f0cd94dc53cd6cba7aff9 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Thu, 11 Nov 2021 16:48:42 -0800
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_DISCOVERABLE to use cmd_sync

This makes MGMT_OP_SET_DISCOVERABLE use hci_cmd_sync_queue instead of
use a dedicated discoverable_update work.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 --
 include/net/bluetooth/hci_sync.h | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b5f061882c10..5f57ff81b7b8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -496,7 +496,6 @@ struct hci_dev {
 	struct work_struct	bg_scan_update;
 	struct work_struct	scan_update;
 	struct work_struct	connectable_update;
-	struct work_struct	discoverable_update;
 	struct delayed_work	le_scan_disable;
 	struct delayed_work	le_scan_restart;
 
@@ -1828,7 +1827,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
 void mgmt_smp_complete(struct hci_conn *conn, bool complete);
 bool mgmt_get_connectable(struct hci_dev *hdev);
 void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status);
-void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status);
 u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev);
 void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
 			    u8 instance);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 00b13e8ca800..d335c0ce8c5d 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -90,6 +90,9 @@ int hci_dev_close_sync(struct hci_dev *hdev);
 int hci_powered_update_sync(struct hci_dev *hdev);
 int hci_set_powered_sync(struct hci_dev *hdev, u8 val);
 
+int hci_update_discoverable_sync(struct hci_dev *hdev);
+int hci_update_discoverable(struct hci_dev *hdev);
+
 int hci_start_discovery_sync(struct hci_dev *hdev);
 int hci_stop_discovery_sync(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From f056a65783cce9c1279c1635d92768ce5962e4d6 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Thu, 11 Nov 2021 16:48:43 -0800
Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_CONNECTABLE to use cmd_sync

This makes MGMT_OP_SET_CONNEABLE use hci_cmd_sync_queue instead of
use a dedicated connetable_update work.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 --
 include/net/bluetooth/hci_sync.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5f57ff81b7b8..acb46ae27b5a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -495,7 +495,6 @@ struct hci_dev {
 	struct work_struct	discov_update;
 	struct work_struct	bg_scan_update;
 	struct work_struct	scan_update;
-	struct work_struct	connectable_update;
 	struct delayed_work	le_scan_disable;
 	struct delayed_work	le_scan_restart;
 
@@ -1826,7 +1825,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			 u16 max_interval, u16 latency, u16 timeout);
 void mgmt_smp_complete(struct hci_conn *conn, bool complete);
 bool mgmt_get_connectable(struct hci_dev *hdev);
-void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status);
 u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev);
 void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
 			    u8 instance);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index d335c0ce8c5d..0336c1bc5d25 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -93,6 +93,8 @@ int hci_set_powered_sync(struct hci_dev *hdev, u8 val);
 int hci_update_discoverable_sync(struct hci_dev *hdev);
 int hci_update_discoverable(struct hci_dev *hdev);
 
+int hci_update_connectable_sync(struct hci_dev *hdev);
+
 int hci_start_discovery_sync(struct hci_dev *hdev);
 int hci_stop_discovery_sync(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From 9482c5074a7d0bec682cc35a12d687f185684f31 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Thu, 11 Nov 2021 16:48:44 -0800
Subject: Bluetooth: hci_request: Remove bg_scan_update work

This work is no longer necessary since all the code using it has been
converted to use hci_passive_scan/hci_passive_scan_sync.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index acb46ae27b5a..2560cfe80db8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -493,7 +493,6 @@ struct hci_dev {
 	struct work_struct	tx_work;
 
 	struct work_struct	discov_update;
-	struct work_struct	bg_scan_update;
 	struct work_struct	scan_update;
 	struct delayed_work	le_scan_disable;
 	struct delayed_work	le_scan_restart;
-- 
cgit v1.2.3


From 0f281a5e5b673698c246da5d7bf0d5fa427c47c5 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Thu, 11 Nov 2021 13:20:53 +0800
Subject: Bluetooth: Ignore HCI_ERROR_CANCELLED_BY_HOST on adv set terminated
 event

This event is received when the controller stops advertising,
specifically for these three reasons:
(a) Connection is successfully created (success).
(b) Timeout is reached (error).
(c) Number of advertising events is reached (error).
(*) This event is NOT generated when the host stops the advertisement.
Refer to the BT spec ver 5.3 vol 4 part E sec 7.7.65.18. Note that the
section was revised from BT spec ver 5.0 vol 2 part E sec 7.7.65.18
which was ambiguous about (*).

Some chips (e.g. RTL8822CE) send this event when the host stops the
advertisement with status = HCI_ERROR_CANCELLED_BY_HOST (due to (*)
above). This is treated as an error and the advertisement will be
removed and userspace will be informed via MGMT event.

On suspend, we are supposed to temporarily disable advertisements,
and continue advertising on resume. However, due to the behavior
above, the advertisements are removed instead.

This patch returns early if HCI_ERROR_CANCELLED_BY_HOST is received.

Btmon snippet of the unexpected behavior:
@ MGMT Command: Remove Advertising (0x003f) plen 1
        Instance: 1
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 6
        Extended advertising: Disabled (0x00)
        Number of sets: 1 (0x01)
        Entry 0
          Handle: 0x01
          Duration: 0 ms (0x00)
          Max ext adv events: 0
> HCI Event: LE Meta Event (0x3e) plen 6
      LE Advertising Set Terminated (0x12)
        Status: Operation Cancelled by Host (0x44)
        Handle: 1
        Connection handle: 0
        Number of completed extended advertising events: 5
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 63065bc01b76..84db6b275231 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -566,6 +566,7 @@ enum {
 #define HCI_ERROR_INVALID_LL_PARAMS	0x1e
 #define HCI_ERROR_UNSPECIFIED		0x1f
 #define HCI_ERROR_ADVERTISING_TIMEOUT	0x3c
+#define HCI_ERROR_CANCELLED_BY_HOST	0x44
 
 /* Flow control modes */
 #define HCI_FLOW_CTL_MODE_PACKET_BASED	0x00
-- 
cgit v1.2.3


From 77d641baa3c8e18a1056bec6c64c6103c1a17b1e Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Mon, 8 Nov 2021 17:27:05 +0100
Subject: power: supply: core: add POWER_SUPPLY_HEALTH_NO_BATTERY

Some chargers can keep the system powered from the mains even when no
battery is present. It this case none of the currently defined health
statuses applies. Add a new status to report that no battery is present.

Suggested-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 9ca1f120a211..2d1318fe2455 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -66,6 +66,7 @@ enum {
 	POWER_SUPPLY_HEALTH_WARM,
 	POWER_SUPPLY_HEALTH_COOL,
 	POWER_SUPPLY_HEALTH_HOT,
+	POWER_SUPPLY_HEALTH_NO_BATTERY,
 };
 
 enum {
-- 
cgit v1.2.3


From 451dc48c806a7ce9fbec5e7a24ccf4b2c936e834 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 11 Nov 2021 22:09:16 -0500
Subject: net: ieee802154: handle iftypes as u32

This patch fixes an issue that an u32 netlink value is handled as a
signed enum value which doesn't fit into the range of u32 netlink type.
If it's handled as -1 value some BIT() evaluation ends in a
shift-out-of-bounds issue. To solve the issue we set the to u32 max which
is s32 "-1" value to keep backwards compatibility and let the followed enum
values start counting at 0. This brings the compiler to never handle the
enum as signed and a check if the value is above NL802154_IFTYPE_MAX should
filter -1 out.

Fixes: f3ea5e44231a ("ieee802154: add new interface command")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20211112030916.685793-1-aahringo@redhat.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/nl802154.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index ddcee128f5d9..145acb8f2509 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -19,6 +19,8 @@
  *
  */
 
+#include <linux/types.h>
+
 #define NL802154_GENL_NAME "nl802154"
 
 enum nl802154_commands {
@@ -150,10 +152,9 @@ enum nl802154_attrs {
 };
 
 enum nl802154_iftype {
-	/* for backwards compatibility TODO */
-	NL802154_IFTYPE_UNSPEC = -1,
+	NL802154_IFTYPE_UNSPEC = (~(__u32)0),
 
-	NL802154_IFTYPE_NODE,
+	NL802154_IFTYPE_NODE = 0,
 	NL802154_IFTYPE_MONITOR,
 	NL802154_IFTYPE_COORD,
 
-- 
cgit v1.2.3


From 83dde7498fefeb920b1def317421262317d178e5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 7 Nov 2021 08:40:47 +0200
Subject: RDMA/netlink: Add __maybe_unused to static inline in C file

Like other commits in the tree add __maybe_unused to a static inline in a
C file because some clang compilers will complain about unused code:

>> drivers/infiniband/core/nldev.c:2543:1: warning: unused function '__chk_RDMA_NL_NLDEV'
   MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
   ^

Fixes: e3bf14bdc17a ("rdma: Autoload netlink client modules")
Link: https://lore.kernel.org/r/4a8101919b765e01d7fde6f27fd572c958deeb4a.1636267207.git.leonro@nvidia.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/rdma_netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 2758d9df71ee..c2a79aeee113 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -30,7 +30,7 @@ enum rdma_nl_flags {
  * constant as well and the compiler checks they are the same.
  */
 #define MODULE_ALIAS_RDMA_NETLINK(_index, _val)                                \
-	static inline void __chk_##_index(void)                                \
+	static inline void __maybe_unused __chk_##_index(void)                 \
 	{                                                                      \
 		BUILD_BUG_ON(_index != _val);                                  \
 	}                                                                      \
-- 
cgit v1.2.3


From d7751d6476185ff754b9dad2cba0c0a6e43ecadc Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Thu, 20 May 2021 17:09:58 +0300
Subject: net/mlx5: E-Switch, Fix resetting of encap mode when entering
 switchdev

E-Switch encap mode is relevant only when in switchdev mode.
The RDMA driver can query the encap configuration via
mlx5_eswitch_get_encap_mode(). Make sure it returns the currently
used mode and not the set one.

This reverts the cited commit which reset the encap mode
on entering switchdev and fixes the original issue properly.

Fixes: 9a64144d683a ("net/mlx5: E-Switch, Fix default encap mode")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/eswitch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 97afcea39a7b..8b18fe9771f9 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -145,13 +145,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 	GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
 		ESW_TUN_OPTS_OFFSET + 1)
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
 struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
-static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
 	return MLX5_ESWITCH_NONE;
 }
-- 
cgit v1.2.3


From c2c60ea37e5b6be58c9dd7aff0b2e86ba0f18e0b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:23:01 -0800
Subject: once: use __section(".data.once")

.data.once contains nicely packed bool variables.
It is used already by DO_ONCE_LITE().

Using it also in DO_ONCE() removes holes in .data section.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/once.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/once.h b/include/linux/once.h
index d361fb14ac3a..f54523052bbc 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -38,7 +38,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
 #define DO_ONCE(func, ...)						     \
 	({								     \
 		bool ___ret = false;					     \
-		static bool ___done = false;				     \
+		static bool __section(".data.once") ___done = false;	     \
 		static DEFINE_STATIC_KEY_TRUE(___once_key);		     \
 		if (static_branch_unlikely(&___once_key)) {		     \
 			unsigned long ___flags;				     \
-- 
cgit v1.2.3


From 7071732c26fe2cf141185ed16a8a85d02495ae8c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:23:02 -0800
Subject: net: use .data.once section in netdev_level_once()

Same rationale than prior patch : using the dedicated
section avoid holes and pack all these bool values.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 31a7e6b27681..dd328364dfe9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4942,7 +4942,7 @@ void netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define netdev_level_once(level, dev, fmt, ...)			\
 do {								\
-	static bool __print_once __read_mostly;			\
+	static bool __section(".data.once") __print_once;	\
 								\
 	if (!__print_once) {					\
 		__print_once = true;				\
-- 
cgit v1.2.3


From 49ecc2e9c3abd269951972fa8b23a4d081111b80 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Nov 2021 09:23:03 -0800
Subject: net: align static siphash keys

siphash keys use 16 bytes.

Define siphash_aligned_key_t macro so that we can make sure they
are not crossing a cache line boundary.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/siphash.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
index bf21591a9e5e..3f7427b9e935 100644
--- a/include/linux/siphash.h
+++ b/include/linux/siphash.h
@@ -21,6 +21,8 @@ typedef struct {
 	u64 key[2];
 } siphash_key_t;
 
+#define siphash_aligned_key_t siphash_key_t __aligned(16)
+
 static inline bool siphash_key_is_zero(const siphash_key_t *key)
 {
 	return !(key->key[0] | key->key[1]);
-- 
cgit v1.2.3


From b9241f54138ca5af4d3c5ca6db56be83d7491508 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 15 Nov 2021 17:11:17 +0000
Subject: net: document SMII and correct phylink's new validation mechanism

SMII has not been documented in the kernel, but information on this PHY
interface mode has been recently found. Document it, and correct the
recently introduced phylink handling for this interface mode.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/E1mmfVl-0075nP-14@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 96e43fbb2dd8..1e57cdd95da3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -99,7 +99,7 @@ extern const int phy_10gbit_features_array[1];
  * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay
  * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay
  * @PHY_INTERFACE_MODE_RTBI: Reduced TBI
- * @PHY_INTERFACE_MODE_SMII: ??? MII
+ * @PHY_INTERFACE_MODE_SMII: Serial MII
  * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface
  * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface
  * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax
-- 
cgit v1.2.3


From 5854d4a6cc356ba3e16d8593ac1c089a32d1759c Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Fri, 29 Oct 2021 20:26:12 +0300
Subject: mtd: spi-nor: Get rid of nor->page_size

nor->page_size duplicated what nor->params->page_size indicates
for no good reason. page_size is a flash parameter of fixed value
and it is better suited to be found in nor->params->page_size.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Pratyush Yadav <p.yadav@ti.com>
Reviewed-by: Michael Walle <michael@walle.cc>
Link: https://lore.kernel.org/r/20211029172633.886453-5-tudor.ambarus@microchip.com
---
 include/linux/mtd/spi-nor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f67457748ed8..fc90fce26e33 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -371,7 +371,6 @@ struct spi_nor_flash_parameter;
  * @bouncebuf_size:	size of the bounce buffer
  * @info:		SPI NOR part JEDEC MFR ID and other info
  * @manufacturer:	SPI NOR manufacturer
- * @page_size:		the page size of the SPI NOR
  * @addr_width:		number of address bytes
  * @erase_opcode:	the opcode for erasing a sector
  * @read_opcode:	the read opcode
@@ -401,7 +400,6 @@ struct spi_nor {
 	size_t			bouncebuf_size;
 	const struct flash_info	*info;
 	const struct spi_nor_manufacturer *manufacturer;
-	u32			page_size;
 	u8			addr_width;
 	u8			erase_opcode;
 	u8			read_opcode;
-- 
cgit v1.2.3


From 1e2c3ef0496e72ba9001da5fd1b7ed56ccb30597 Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Mon, 4 Oct 2021 16:11:52 +0200
Subject: tee: export teedev_open() and teedev_close_context()

Exports the two functions teedev_open() and teedev_close_context() in
order to make it easier to create a driver internal struct tee_context.

Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_drv.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index a1f03461369b..468a7d83dc6c 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -587,4 +587,18 @@ struct tee_client_driver {
 #define to_tee_client_driver(d) \
 		container_of(d, struct tee_client_driver, driver)
 
+/**
+ * teedev_open() - Open a struct tee_device
+ * @teedev:	Device to open
+ *
+ * @return a pointer to struct tee_context on success or an ERR_PTR on failure.
+ */
+struct tee_context *teedev_open(struct tee_device *teedev);
+
+/**
+ * teedev_close_context() - closes a struct tee_context
+ * @ctx:	The struct tee_context to close
+ */
+void teedev_close_context(struct tee_context *ctx);
+
 #endif /*__TEE_DRV_H*/
-- 
cgit v1.2.3


From 49c39ec4670a8f045729e3717af2e1a74caf89a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 13 Sep 2021 14:21:25 +0200
Subject: dma-buf: nuke dma_resv_get_excl_unlocked
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Heureka, that's finally not used any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210917123513.1106-27-christian.koenig@amd.com
---
 include/linux/dma-resv.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 09c6063b199a..eebf04325b34 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -440,32 +440,6 @@ dma_resv_excl_fence(struct dma_resv *obj)
 	return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
 }
 
-/**
- * dma_resv_get_excl_unlocked - get the reservation object's
- * exclusive fence, without lock held.
- * @obj: the reservation object
- *
- * If there is an exclusive fence, this atomically increments it's
- * reference count and returns it.
- *
- * RETURNS
- * The exclusive fence or NULL if none
- */
-static inline struct dma_fence *
-dma_resv_get_excl_unlocked(struct dma_resv *obj)
-{
-	struct dma_fence *fence;
-
-	if (!rcu_access_pointer(obj->fence_excl))
-		return NULL;
-
-	rcu_read_lock();
-	fence = dma_fence_get_rcu_safe(&obj->fence_excl);
-	rcu_read_unlock();
-
-	return fence;
-}
-
 /**
  * dma_resv_shared_list - get the reservation object's shared fence list
  * @obj: the reservation object
-- 
cgit v1.2.3


From 2fb75e1b642f49253d8848c9e47e8942f5366221 Mon Sep 17 00:00:00 2001
From: Liu Xinpeng <liuxp11@chinatelecom.cn>
Date: Mon, 25 Oct 2021 11:46:26 +0800
Subject: psi: Add a missing SPDX license header

Add the missing SPDX license header to
include/linux/psi.h
include/linux/psi_types.h
kernel/sched/psi.c

Signed-off-by: Liu Xinpeng <liuxp11@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lore.kernel.org/r/1635133586-84611-2-git-send-email-liuxp11@chinatelecom.cn
---
 include/linux/psi.h       | 1 +
 include/linux/psi_types.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/psi.h b/include/linux/psi.h
index 65eb1476ac70..a70ca833c6d7 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_PSI_H
 #define _LINUX_PSI_H
 
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 0a23300d49af..bf50068d5d4b 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_PSI_TYPES_H
 #define _LINUX_PSI_TYPES_H
 
-- 
cgit v1.2.3


From 4feee7d12603deca8775f9f9ae5e121093837444 Mon Sep 17 00:00:00 2001
From: Josh Don <joshdon@google.com>
Date: Mon, 18 Oct 2021 13:34:28 -0700
Subject: sched/core: Forced idle accounting

Adds accounting for "forced idle" time, which is time where a cookie'd
task forces its SMT sibling to idle, despite the presence of runnable
tasks.

Forced idle time is one means to measure the cost of enabling core
scheduling (ie. the capacity lost due to the need to force idle).

Forced idle time is attributed to the thread responsible for causing
the forced idle.

A few details:
 - Forced idle time is displayed via /proc/PID/sched. It also requires
   that schedstats is enabled.
 - Forced idle is only accounted when a sibling hyperthread is held
   idle despite the presence of runnable tasks. No time is charged if
   a sibling is idle but has no runnable tasks.
 - Tasks with 0 cookie are never charged forced idle.
 - For SMT > 2, we scale the amount of forced idle charged based on the
   number of forced idle siblings. Additionally, we split the time up and
   evenly charge it to all running tasks, as each is equally responsible
   for the forced idle.

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20211018203428.2025792-1-joshdon@google.com
---
 include/linux/sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..d2e261adb8ea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -523,7 +523,11 @@ struct sched_statistics {
 	u64				nr_wakeups_affine_attempts;
 	u64				nr_wakeups_passive;
 	u64				nr_wakeups_idle;
+
+#ifdef CONFIG_SCHED_CORE
+	u64				core_forceidle_sum;
 #endif
+#endif /* CONFIG_SCHEDSTATS */
 } ____cacheline_aligned;
 
 struct sched_entity {
-- 
cgit v1.2.3


From cb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf Mon Sep 17 00:00:00 2001
From: Brian Chen <brianchen118@gmail.com>
Date: Wed, 10 Nov 2021 21:33:12 +0000
Subject: psi: Fix PSI_MEM_FULL state when tasks are in memstall and doing
 reclaim

We've noticed cases where tasks in a cgroup are stalled on memory but
there is little memory FULL pressure since tasks stay on the runqueue
in reclaim.

A simple example involves a single threaded program that keeps leaking
and touching large amounts of memory. It runs in a cgroup with swap
enabled, memory.high set at 10M and cpu.max ratio set at 5%. Though
there is significant CPU pressure and memory SOME, there is barely any
memory FULL since the task enters reclaim and stays on the runqueue.
However, this memory-bound task is effectively stalled on memory and
we expect memory FULL to match memory SOME in this scenario.

The code is confused about memstall && running, thinking there is a
stalled task and a productive task when there's only one task: a
reclaimer that's counted as both. To fix this, we redefine the
condition for PSI_MEM_FULL to check that all running tasks are in an
active memstall instead of checking that there are no running tasks.

        case PSI_MEM_FULL:
-               return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]);
+               return unlikely(tasks[NR_MEMSTALL] &&
+                       tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);

This will capture reclaimers. It will also capture tasks that called
psi_memstall_enter() and are about to sleep, but this should be
negligible noise.

Signed-off-by: Brian Chen <brianchen118@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lore.kernel.org/r/20211110213312.310243-1-brianchen118@gmail.com
---
 include/linux/psi_types.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index bf50068d5d4b..516c0fe836fd 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -22,7 +22,17 @@ enum psi_task_count {
 	 * don't have to special case any state tracking for it.
 	 */
 	NR_ONCPU,
-	NR_PSI_TASK_COUNTS = 4,
+	/*
+	 * For IO and CPU stalls the presence of running/oncpu tasks
+	 * in the domain means a partial rather than a full stall.
+	 * For memory it's not so simple because of page reclaimers:
+	 * they are running/oncpu while representing a stall. To tell
+	 * whether a domain has productivity left or not, we need to
+	 * distinguish between regular running (i.e. productive)
+	 * threads and memstall ones.
+	 */
+	NR_MEMSTALL_RUNNING,
+	NR_PSI_TASK_COUNTS = 5,
 };
 
 /* Task state bitmasks */
@@ -30,6 +40,7 @@ enum psi_task_count {
 #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
 #define TSK_RUNNING	(1 << NR_RUNNING)
 #define TSK_ONCPU	(1 << NR_ONCPU)
+#define TSK_MEMSTALL_RUNNING	(1 << NR_MEMSTALL_RUNNING)
 
 /* Resources that workloads could be stalled on */
 enum psi_res {
-- 
cgit v1.2.3


From ff083a2d972f56bebfd82409ca62e5dfce950961 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:22 +0000
Subject: perf: Protect perf_guest_cbs with RCU

Protect perf_guest_cbs with RCU to fix multiple possible errors.  Luckily,
all paths that read perf_guest_cbs already require RCU protection, e.g. to
protect the callback chains, so only the direct perf_guest_cbs touchpoints
need to be modified.

Bug #1 is a simple lack of WRITE_ONCE/READ_ONCE behavior to ensure
perf_guest_cbs isn't reloaded between a !NULL check and a dereference.
Fixed via the READ_ONCE() in rcu_dereference().

Bug #2 is that on weakly-ordered architectures, updates to the callbacks
themselves are not guaranteed to be visible before the pointer is made
visible to readers.  Fixed by the smp_store_release() in
rcu_assign_pointer() when the new pointer is non-NULL.

Bug #3 is that, because the callbacks are global, it's possible for
readers to run in parallel with an unregisters, and thus a module
implementing the callbacks can be unloaded while readers are in flight,
resulting in a use-after-free.  Fixed by a synchronize_rcu() call when
unregistering callbacks.

Bug #1 escaped notice because it's extremely unlikely a compiler will
reload perf_guest_cbs in this sequence.  perf_guest_cbs does get reloaded
for future derefs, e.g. for ->is_user_mode(), but the ->is_in_guest()
guard all but guarantees the consumer will win the race, e.g. to nullify
perf_guest_cbs, KVM has to completely exit the guest and teardown down
all VMs before KVM start its module unload / unregister sequence.  This
also makes it all but impossible to encounter bug #3.

Bug #2 has not been a problem because all architectures that register
callbacks are strongly ordered and/or have a static set of callbacks.

But with help, unloading kvm_intel can trigger bug #1 e.g. wrapping
perf_guest_cbs with READ_ONCE in perf_misc_flags() while spamming
kvm_intel module load/unload leads to:

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: 0000 [#1] PREEMPT SMP
  CPU: 6 PID: 1825 Comm: stress Not tainted 5.14.0-rc2+ #459
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:perf_misc_flags+0x1c/0x70
  Call Trace:
   perf_prepare_sample+0x53/0x6b0
   perf_event_output_forward+0x67/0x160
   __perf_event_overflow+0x52/0xf0
   handle_pmi_common+0x207/0x300
   intel_pmu_handle_irq+0xcf/0x410
   perf_event_nmi_handler+0x28/0x50
   nmi_handle+0xc7/0x260
   default_do_nmi+0x6b/0x170
   exc_nmi+0x103/0x130
   asm_exc_nmi+0x76/0xbf

Fixes: 39447b386c84 ("perf: Enhance perf to allow for guest statistic collection from host")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20211111020738.2512932-2-seanjc@google.com
---
 include/linux/perf_event.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0dcfd265beed..318c489b735b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1240,7 +1240,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 				 enum perf_bpf_event_type type,
 				 u16 flags);
 
-extern struct perf_guest_info_callbacks *perf_guest_cbs;
+extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
+static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
+{
+	/*
+	 * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading
+	 * the callbacks between a !NULL check and dereferences, to ensure
+	 * pending stores/changes to the callback pointers are visible before a
+	 * non-NULL perf_guest_cbs is visible to readers, and to prevent a
+	 * module from unloading callbacks while readers are active.
+	 */
+	return rcu_dereference(perf_guest_cbs);
+}
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
-- 
cgit v1.2.3


From 2934e3d09350c1a7ca2433fbeabfcd831e48a575 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:25 +0000
Subject: perf: Stop pretending that perf can handle multiple guest callbacks

Drop the 'int' return value from the perf (un)register callbacks helpers
and stop pretending perf can support multiple callbacks.  The 'int'
returns are not future proofing anything as none of the callers take
action on an error.  It's also not obvious that there will ever be
co-tenant hypervisors, and if there are, that allowing multiple callbacks
to be registered is desirable or even correct.

Opportunistically rename callbacks=>cbs in the affected declarations to
match their definitions.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20211111020738.2512932-5-seanjc@google.com
---
 include/linux/perf_event.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 318c489b735b..98c204488496 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1252,8 +1252,8 @@ static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
 	 */
 	return rcu_dereference(perf_guest_cbs);
 }
-extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
-extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
+extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
+extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 
 extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
@@ -1497,10 +1497,10 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
 static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
-static inline int perf_register_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
-static inline int perf_unregister_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
+static inline void perf_register_guest_info_callbacks
+(struct perf_guest_info_callbacks *cbs)					{ }
+static inline void perf_unregister_guest_info_callbacks
+(struct perf_guest_info_callbacks *cbs)					{ }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 
-- 
cgit v1.2.3


From b9f5621c9547dd787900f005a9e1c3d5712de512 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Thu, 11 Nov 2021 02:07:27 +0000
Subject: perf/core: Rework guest callbacks to prepare for static_call support

To prepare for using static_calls to optimize perf's guest callbacks,
replace ->is_in_guest and ->is_user_mode with a new multiplexed hook
->state, tweak ->handle_intel_pt_intr to play nice with being called when
there is no active guest, and drop "guest" from ->get_guest_ip.

Return '0' from ->state and ->handle_intel_pt_intr to indicate "not in
guest" so that DEFINE_STATIC_CALL_RET0 can be used to define the static
calls, i.e. no callback == !guest.

[sean: extracted from static_call patch, fixed get_ip() bug, wrote changelog]
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Originally-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20211111020738.2512932-7-seanjc@google.com
---
 include/linux/perf_event.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 98c204488496..5e6b346d62a7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -26,11 +26,13 @@
 # include <asm/local64.h>
 #endif
 
+#define PERF_GUEST_ACTIVE	0x01
+#define PERF_GUEST_USER	0x02
+
 struct perf_guest_info_callbacks {
-	int				(*is_in_guest)(void);
-	int				(*is_user_mode)(void);
-	unsigned long			(*get_guest_ip)(void);
-	void				(*handle_intel_pt_intr)(void);
+	unsigned int			(*state)(void);
+	unsigned long			(*get_ip)(void);
+	unsigned int			(*handle_intel_pt_intr)(void);
 };
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-- 
cgit v1.2.3


From 1c3430516b0732d923de9fd3bfb3e2e537eeb235 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:28 +0000
Subject: perf: Add wrappers for invoking guest callbacks

Add helpers for the guest callbacks to prepare for burying the callbacks
behind a Kconfig (it's a lot easier to provide a few stubs than to #ifdef
piles of code), and also to prepare for converting the callbacks to
static_call().  perf_instruction_pointer() in particular will have subtle
semantics with static_call(), as the "no callbacks" case will return 0 if
the callbacks are unregistered between querying guest state and getting
the IP.  Implement the change now to avoid a functional change when adding
static_call() support, and because the new helper needs to return
_something_ in this case.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20211111020738.2512932-8-seanjc@google.com
---
 include/linux/perf_event.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5e6b346d62a7..346d5aff5804 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1254,6 +1254,30 @@ static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
 	 */
 	return rcu_dereference(perf_guest_cbs);
 }
+static inline unsigned int perf_guest_state(void)
+{
+	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+
+	return guest_cbs ? guest_cbs->state() : 0;
+}
+static inline unsigned long perf_guest_get_ip(void)
+{
+	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+
+	/*
+	 * Arbitrarily return '0' in the unlikely scenario that the callbacks
+	 * are unregistered between checking guest state and getting the IP.
+	 */
+	return guest_cbs ? guest_cbs->get_ip() : 0;
+}
+static inline unsigned int perf_guest_handle_intel_pt_intr(void)
+{
+	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+
+	if (guest_cbs && guest_cbs->handle_intel_pt_intr)
+		return guest_cbs->handle_intel_pt_intr();
+	return 0;
+}
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 
-- 
cgit v1.2.3


From 2aef6f306b39bbe74e2287d6e2ee07c4867d87d0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:29 +0000
Subject: perf: Force architectures to opt-in to guest callbacks

Introduce GUEST_PERF_EVENTS and require architectures to select it to
allow registering and using guest callbacks in perf.  This will hopefully
make it more difficult for new architectures to add useless "support" for
guest callbacks, e.g. via copy+paste.

Stubbing out the helpers has the happy bonus of avoiding a load of
perf_guest_cbs when GUEST_PERF_EVENTS=n on arm64/x86.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20211111020738.2512932-9-seanjc@google.com
---
 include/linux/perf_event.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 346d5aff5804..ea47ef616ee0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1242,6 +1242,7 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 				 enum perf_bpf_event_type type,
 				 u16 flags);
 
+#ifdef CONFIG_GUEST_PERF_EVENTS
 extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
 static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
 {
@@ -1280,6 +1281,11 @@ static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 }
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
+#else
+static inline unsigned int perf_guest_state(void)		 { return 0; }
+static inline unsigned long perf_guest_get_ip(void)		 { return 0; }
+static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
+#endif /* CONFIG_GUEST_PERF_EVENTS */
 
 extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
-- 
cgit v1.2.3


From 87b940a0675e25261f022ac3e53e0dfff9cdb995 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:30 +0000
Subject: perf/core: Use static_call to optimize perf_guest_info_callbacks

Use static_call to optimize perf's guest callbacks on arm64 and x86,
which are now the only architectures that define the callbacks.  Use
DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as
the callback semantics are that a return value '0' means "not in guest".

static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is
also advantageous versus other solutions, e.g. per-cpu callbacks, in that
a per-cpu memory load is not needed to detect the !guest case.

Based on code from Peter and Like.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20211111020738.2512932-10-seanjc@google.com
---
 include/linux/perf_event.h | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ea47ef616ee0..0ac7d867ca0c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1244,40 +1244,22 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
 
 #ifdef CONFIG_GUEST_PERF_EVENTS
 extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
-static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
-{
-	/*
-	 * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading
-	 * the callbacks between a !NULL check and dereferences, to ensure
-	 * pending stores/changes to the callback pointers are visible before a
-	 * non-NULL perf_guest_cbs is visible to readers, and to prevent a
-	 * module from unloading callbacks while readers are active.
-	 */
-	return rcu_dereference(perf_guest_cbs);
-}
+
+DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
+DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+
 static inline unsigned int perf_guest_state(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	return guest_cbs ? guest_cbs->state() : 0;
+	return static_call(__perf_guest_state)();
 }
 static inline unsigned long perf_guest_get_ip(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	/*
-	 * Arbitrarily return '0' in the unlikely scenario that the callbacks
-	 * are unregistered between checking guest state and getting the IP.
-	 */
-	return guest_cbs ? guest_cbs->get_ip() : 0;
+	return static_call(__perf_guest_get_ip)();
 }
 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	if (guest_cbs && guest_cbs->handle_intel_pt_intr)
-		return guest_cbs->handle_intel_pt_intr();
-	return 0;
+	return static_call(__perf_guest_handle_intel_pt_intr)();
 }
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
-- 
cgit v1.2.3


From e1bfc24577cc65c95dc519d7621a9c985b97e567 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:33 +0000
Subject: KVM: Move x86's perf guest info callbacks to generic KVM

Move x86's perf guest callbacks into common KVM, as they are semantically
identical to arm64's callbacks (the only other such KVM callbacks).
arm64 will convert to the common versions in a future patch.

Implement the necessary arm64 arch hooks now to avoid having to provide
stubs or a temporary #define (from x86) to avoid arm64 compilation errors
when CONFIG_GUEST_PERF_EVENTS=y.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211111020738.2512932-13-seanjc@google.com
---
 include/linux/kvm_host.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9e0667e3723e..9df7ab2d7530 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1170,6 +1170,16 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
 }
 #endif
 
+#ifdef CONFIG_GUEST_PERF_EVENTS
+unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
+
+void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
+void kvm_unregister_perf_callbacks(void);
+#else
+static inline void kvm_register_perf_callbacks(void *ign) {}
+static inline void kvm_unregister_perf_callbacks(void) {}
+#endif /* CONFIG_GUEST_PERF_EVENTS */
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
-- 
cgit v1.2.3


From be399d824b432a85f8df86b566d2e5994fdf58b0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:36 +0000
Subject: KVM: arm64: Hide kvm_arm_pmu_available behind CONFIG_HW_PERF_EVENTS=y

Move the definition of kvm_arm_pmu_available to pmu-emul.c and, out of
"necessity", hide it behind CONFIG_HW_PERF_EVENTS.  Provide a stub for
the key's wrapper, kvm_arm_support_pmu_v3().  Moving the key's definition
out of perf.c will allow a future commit to delete perf.c entirely.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211111020738.2512932-16-seanjc@google.com
---
 include/kvm/arm_pmu.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 90f21898aad8..f9ed4c171d7b 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -13,13 +13,6 @@
 #define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
 #define ARMV8_PMU_MAX_COUNTER_PAIRS	((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
 
-DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
-
-static __always_inline bool kvm_arm_support_pmu_v3(void)
-{
-	return static_branch_likely(&kvm_arm_pmu_available);
-}
-
 #ifdef CONFIG_HW_PERF_EVENTS
 
 struct kvm_pmc {
@@ -36,6 +29,13 @@ struct kvm_pmu {
 	struct irq_work overflow_work;
 };
 
+DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
+static __always_inline bool kvm_arm_support_pmu_v3(void)
+{
+	return static_branch_likely(&kvm_arm_pmu_available);
+}
+
 #define kvm_arm_pmu_irq_initialized(v)	((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
@@ -65,6 +65,11 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
 struct kvm_pmu {
 };
 
+static inline bool kvm_arm_support_pmu_v3(void)
+{
+	return false;
+}
+
 #define kvm_arm_pmu_irq_initialized(v)	(false)
 static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
 					    u64 select_idx)
-- 
cgit v1.2.3


From a9f4a6e92b3b319296fb078da2615f618f6cd80c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 11 Nov 2021 02:07:38 +0000
Subject: perf: Drop guest callback (un)register stubs

Drop perf's stubs for (un)registering guest callbacks now that KVM
registration of callbacks is hidden behind GUEST_PERF_EVENTS=y.  The only
other user is x86 XEN_PV, and x86 unconditionally selects PERF_EVENTS.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20211111020738.2512932-18-seanjc@google.com
---
 include/linux/perf_event.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0ac7d867ca0c..7b7525e9155f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1511,11 +1511,6 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
 static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
-static inline void perf_register_guest_info_callbacks
-(struct perf_guest_info_callbacks *cbs)					{ }
-static inline void perf_unregister_guest_info_callbacks
-(struct perf_guest_info_callbacks *cbs)					{ }
-
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 
 typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
-- 
cgit v1.2.3


From f45b2974cc0ae959a4c503a071e38a56bd64372f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn@kernel.org>
Date: Wed, 17 Nov 2021 13:57:08 +0100
Subject: bpf, x86: Fix "no previous prototype" warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The arch_prepare_bpf_dispatcher function does not have a prototype, and
yields the following warning when W=1 is enabled for the kernel build.

  >> arch/x86/net/bpf_jit_comp.c:2188:5: warning: no previous \
  prototype for 'arch_prepare_bpf_dispatcher' [-Wmissing-prototypes]
        2188 | int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, \
	int num_funcs)
             |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~

Remove the warning by adding a function declaration to include/linux/bpf.h.

Fixes: 75ccbef6369e ("bpf: Introduce BPF dispatcher")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211117125708.769168-1-bjorn@kernel.org
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e7a163a3146b..84ff6ef49462 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -732,6 +732,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
+int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
 	.func = &_name##_func,					\
-- 
cgit v1.2.3


From cf9acc90c80ecbee00334aa85d92f4e74014bcff Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@nutanix.com>
Date: Tue, 16 Nov 2021 17:42:42 +0000
Subject: net: virtio_net_hdr_to_skb: count transport header in UFO

virtio_net_hdr_to_skb does not set the skb's gso_size and gso_type
correctly for UFO packets received via virtio-net that are a little over
the GSO size. This can lead to problems elsewhere in the networking
stack, e.g. ovs_vport_send dropping over-sized packets if gso_size is
not set.

This is due to the comparison

  if (skb->len - p_off > gso_size)

not properly accounting for the transport layer header.

p_off includes the size of the transport layer header (thlen), so
skb->len - p_off is the size of the TCP/UDP payload.

gso_size is read from the virtio-net header. For UFO, fragmentation
happens at the IP level so does not need to include the UDP header.

Hence the calculation could be comparing a TCP/UDP payload length with
an IP payload length, causing legitimate virtio-net packets to have
lack gso_type/gso_size information.

Example: a UDP packet with payload size 1473 has IP payload size 1481.
If the guest used UFO, it is not fragmented and the virtio-net header's
flags indicate that it is a GSO frame (VIRTIO_NET_HDR_GSO_UDP), with
gso_size = 1480 for an MTU of 1500.  skb->len will be 1515 and p_off
will be 42, so skb->len - p_off = 1473.  Hence the comparison fails, and
shinfo->gso_size and gso_type are not set as they should be.

Instead, add the UDP header length before comparing to gso_size when
using UFO. In this way, it is the size of the IP payload that is
compared to gso_size.

Fixes: 6dd912f82680 ("net: check untrusted gso_size at kernel entry")
Signed-off-by: Jonathan Davies <jonathan.davies@nutanix.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index b465f8f3e554..04e87f4b9417 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -120,10 +120,15 @@ retry:
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+		unsigned int nh_off = p_off;
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
+		/* UFO may not include transport header in gso_size. */
+		if (gso_type & SKB_GSO_UDP)
+			nh_off -= thlen;
+
 		/* Too small packets are not really GSO ones. */
-		if (skb->len - p_off > gso_size) {
+		if (skb->len - nh_off > gso_size) {
 			shinfo->gso_size = gso_size;
 			shinfo->gso_type = gso_type;
 
-- 
cgit v1.2.3


From 8160fb43d55d26d64607fd32fe69185a5f5fe41f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Nov 2021 19:29:21 -0800
Subject: net: use an atomic_long_t for queue->trans_timeout

tx_timeout_show() assumed dev_watchdog() would stop all
the queues, to fetch queue->trans_timeout under protection
of the queue->_xmit_lock.

As we want to no longer disrupt transmits, we use an
atomic_long_t instead.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: david decotigny <david.decotigny@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dd328364dfe9..1d22483cf78c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -592,7 +592,7 @@ struct netdev_queue {
 	 * Number of TX timeouts for this queue
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
-	unsigned long		trans_timeout;
+	atomic_long_t		trans_timeout;
 
 	/* Subordinate device that the queue has been assigned to */
 	struct net_device	*sb_dev;
-- 
cgit v1.2.3


From 5337824f4dc4bb26f38fbbba4ffb425a92803f15 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Nov 2021 19:29:22 -0800
Subject: net: annotate accesses to queue->trans_start

In following patches, dev_watchdog() will no longer stop all queues.
It will read queue->trans_start locklessly.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d22483cf78c..279409ef2b18 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4095,10 +4095,21 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 	spin_unlock_bh(&txq->_xmit_lock);
 }
 
+/*
+ * txq->trans_start can be read locklessly from dev_watchdog()
+ */
 static inline void txq_trans_update(struct netdev_queue *txq)
 {
 	if (txq->xmit_lock_owner != -1)
-		txq->trans_start = jiffies;
+		WRITE_ONCE(txq->trans_start, jiffies);
+}
+
+static inline void txq_trans_cond_update(struct netdev_queue *txq)
+{
+	unsigned long now = jiffies;
+
+	if (READ_ONCE(txq->trans_start) != now)
+		WRITE_ONCE(txq->trans_start, now);
 }
 
 /* legacy drivers only, netdev_start_xmit() sets txq->trans_start */
@@ -4106,8 +4117,7 @@ static inline void netif_trans_update(struct net_device *dev)
 {
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 
-	if (txq->trans_start != jiffies)
-		txq->trans_start = jiffies;
+	txq_trans_cond_update(txq);
 }
 
 /**
-- 
cgit v1.2.3


From dab8fe320726b38a6b1dc6a7ca6e386c5f7779e8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Nov 2021 19:29:23 -0800
Subject: net: do not inline netif_tx_lock()/netif_tx_unlock()

These are not fast path, there is no point in inlining them.

Also provide netif_freeze_queues()/netif_unfreeze_queues()
so that we can use them from dev_watchdog() in the following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 39 ++-------------------------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 279409ef2b18..4f4a299e92de 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4126,27 +4126,7 @@ static inline void netif_trans_update(struct net_device *dev)
  *
  * Get network device transmit lock
  */
-static inline void netif_tx_lock(struct net_device *dev)
-{
-	unsigned int i;
-	int cpu;
-
-	spin_lock(&dev->tx_global_lock);
-	cpu = smp_processor_id();
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-
-		/* We are the only thread of execution doing a
-		 * freeze, but we have to grab the _xmit_lock in
-		 * order to synchronize with threads which are in
-		 * the ->hard_start_xmit() handler and already
-		 * checked the frozen bit.
-		 */
-		__netif_tx_lock(txq, cpu);
-		set_bit(__QUEUE_STATE_FROZEN, &txq->state);
-		__netif_tx_unlock(txq);
-	}
-}
+void netif_tx_lock(struct net_device *dev);
 
 static inline void netif_tx_lock_bh(struct net_device *dev)
 {
@@ -4154,22 +4134,7 @@ static inline void netif_tx_lock_bh(struct net_device *dev)
 	netif_tx_lock(dev);
 }
 
-static inline void netif_tx_unlock(struct net_device *dev)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-
-		/* No need to grab the _xmit_lock here.  If the
-		 * queue is not stopped for another reason, we
-		 * force a schedule.
-		 */
-		clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
-		netif_schedule_queue(txq);
-	}
-	spin_unlock(&dev->tx_global_lock);
-}
+void netif_tx_unlock(struct net_device *dev);
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
 {
-- 
cgit v1.2.3


From 1881eadb2041889d74d60c074eb04189c4a07dad Mon Sep 17 00:00:00 2001
From: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Date: Mon, 25 Oct 2021 21:25:20 -0700
Subject: firmware: xilinx: add register notifier in zynqmp firmware

In zynqmp-firmware, register notifier is not supported, add support of
register notifier in zynqmp-firmware.

Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Tejas Patel <tejas.patel@xilinx.com>
Signed-off-by: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Link: https://lore.kernel.org/r/20211026042525.26612-2-abhyuday.godhasara@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/firmware/xlnx-zynqmp.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 47fd4e52a423..d30d39dc8cb4 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -2,7 +2,7 @@
 /*
  * Xilinx Zynq MPSoC Firmware layer
  *
- *  Copyright (C) 2014-2019 Xilinx
+ *  Copyright (C) 2014-2021 Xilinx
  *
  *  Michal Simek <michal.simek@xilinx.com>
  *  Davorin Mista <davorin.mista@aggios.com>
@@ -66,6 +66,7 @@
 
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
+	PM_REGISTER_NOTIFIER = 5,
 	PM_SYSTEM_SHUTDOWN = 12,
 	PM_REQUEST_NODE = 13,
 	PM_RELEASE_NODE = 14,
@@ -427,6 +428,8 @@ int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param,
 int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param,
 				 u32 value);
 int zynqmp_pm_load_pdi(const u32 src, const u64 address);
+int zynqmp_pm_register_notifier(const u32 node, const u32 event,
+				const u32 wake, const u32 enable);
 #else
 static inline int zynqmp_pm_get_api_version(u32 *version)
 {
@@ -658,6 +661,12 @@ static inline int zynqmp_pm_load_pdi(const u32 src, const u64 address)
 {
 	return -ENODEV;
 }
+
+static inline int zynqmp_pm_register_notifier(const u32 node, const u32 event,
+					      const u32 wake, const u32 enable)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif /* __FIRMWARE_ZYNQMP_H__ */
-- 
cgit v1.2.3


From fbce9f14055e547d270046f61758c29c957e675d Mon Sep 17 00:00:00 2001
From: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Date: Mon, 25 Oct 2021 21:25:21 -0700
Subject: firmware: xilinx: add macros of node ids for error event

Add macros for the Node-Id of Error events.

Move supported api callback ids from zynqmp-power to zynqmp-firmware.

Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Link: https://lore.kernel.org/r/20211026042525.26612-3-abhyuday.godhasara@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/firmware/xlnx-zynqmp.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index d30d39dc8cb4..b0a38091db71 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -64,6 +64,20 @@
 #define XILINX_ZYNQMP_PM_FPGA_FULL	0x0U
 #define XILINX_ZYNQMP_PM_FPGA_PARTIAL	BIT(0)
 
+/*
+ * Node IDs for the Error Events.
+ */
+#define EVENT_ERROR_PMC_ERR1	(0x28100000U)
+#define EVENT_ERROR_PMC_ERR2	(0x28104000U)
+#define EVENT_ERROR_PSM_ERR1	(0x28108000U)
+#define EVENT_ERROR_PSM_ERR2	(0x2810C000U)
+
+enum pm_api_cb_id {
+	PM_INIT_SUSPEND_CB = 30,
+	PM_ACKNOWLEDGE_CB = 31,
+	PM_NOTIFY_CB = 32,
+};
+
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
 	PM_REGISTER_NOTIFIER = 5,
-- 
cgit v1.2.3


From f4d77525679e289d4976ca03b620ac4cc5403205 Mon Sep 17 00:00:00 2001
From: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Date: Mon, 25 Oct 2021 21:25:22 -0700
Subject: firmware: xilinx: export the feature check of zynqmp firmware

Export the zynqmp_pm_feature(), so it can be use by other as to get API
version available in firmware.

Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Link: https://lore.kernel.org/r/20211026042525.26612-4-abhyuday.godhasara@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/firmware/xlnx-zynqmp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index b0a38091db71..077e894bb340 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -444,6 +444,7 @@ int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param,
 int zynqmp_pm_load_pdi(const u32 src, const u64 address);
 int zynqmp_pm_register_notifier(const u32 node, const u32 event,
 				const u32 wake, const u32 enable);
+int zynqmp_pm_feature(const u32 api_id);
 #else
 static inline int zynqmp_pm_get_api_version(u32 *version)
 {
@@ -681,6 +682,11 @@ static inline int zynqmp_pm_register_notifier(const u32 node, const u32 event,
 {
 	return -ENODEV;
 }
+
+static inline int zynqmp_pm_feature(const u32 api_id)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif /* __FIRMWARE_ZYNQMP_H__ */
-- 
cgit v1.2.3


From 522a0032af005502507f5f81ae64fdcc82b5d068 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 6 Nov 2021 17:13:35 -0400
Subject: Add linux/cacheflush.h

Many architectures do not include asm-generic/cacheflush.h, so turn
the includes on their head and add linux/cacheflush.h which includes
asm/cacheflush.h.

Move the flush_dcache_folio() declaration from asm-generic/cacheflush.h
to linux/cacheflush.h and change linux/highmem.h to include
linux/cacheflush.h instead of asm/cacheflush.h so that all necessary
places will see flush_dcache_folio().

More functions should have their default implementations moved in the
future, but those are for follow-on patches.  This fixes csky, sparc and
sparc64 which were missed in the commit which added flush_dcache_folio().

Fixes: 08b0b0059bf1 ("mm: Add flush_dcache_folio()")
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 include/asm-generic/cacheflush.h |  6 ------
 include/linux/cacheflush.h       | 18 ++++++++++++++++++
 include/linux/highmem.h          |  3 +--
 3 files changed, 19 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/cacheflush.h

(limited to 'include')

diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index fedc0dfa4877..4f07afacbc23 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -50,13 +50,7 @@ static inline void flush_dcache_page(struct page *page)
 {
 }
 
-static inline void flush_dcache_folio(struct folio *folio) { }
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-#endif
-
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-void flush_dcache_folio(struct folio *folio);
 #endif
 
 #ifndef flush_dcache_mmap_lock
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
new file mode 100644
index 000000000000..fef8b607f97e
--- /dev/null
+++ b/include/linux/cacheflush.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CACHEFLUSH_H
+#define _LINUX_CACHEFLUSH_H
+
+#include <asm/cacheflush.h>
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
+#else
+static inline void flush_dcache_folio(struct folio *folio)
+{
+}
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
+#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+
+#endif /* _LINUX_CACHEFLUSH_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 25aff0f2ed0b..c944b3b70ee7 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -5,12 +5,11 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/cacheflush.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 
-#include <asm/cacheflush.h>
-
 #include "highmem-internal.h"
 
 /**
-- 
cgit v1.2.3


From 9c3252152e8a6401c2b9e32490a5a16ec4472778 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 16 Nov 2021 21:17:14 -0500
Subject: mm: Rename folio_test_multi to folio_test_large

This is a better name.  Also add kernel-doc.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/page-flags.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 52ec4b5e5615..05510118fbb8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -692,7 +692,13 @@ static inline bool folio_test_single(struct folio *folio)
 	return !folio_test_head(folio);
 }
 
-static inline bool folio_test_multi(struct folio *folio)
+/**
+ * folio_test_large() - Does this folio contain more than one page?
+ * @folio: The folio to test.
+ *
+ * Return: True if the folio is larger than one page.
+ */
+static inline bool folio_test_large(struct folio *folio)
 {
 	return folio_test_head(folio);
 }
-- 
cgit v1.2.3


From a1efe484dd8c04c4c2d4eb1ee6b04d01cfc07ccc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 16 Nov 2021 21:18:52 -0500
Subject: mm: Remove folio_test_single

There's no need for this predicate; callers can just use
!folio_test_large().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/page-flags.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 05510118fbb8..b5f14d581113 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -686,12 +686,6 @@ static inline bool test_set_page_writeback(struct page *page)
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
 
-/* Whether there are one or multiple pages in a folio */
-static inline bool folio_test_single(struct folio *folio)
-{
-	return !folio_test_head(folio);
-}
-
 /**
  * folio_test_large() - Does this folio contain more than one page?
  * @folio: The folio to test.
-- 
cgit v1.2.3


From ff36da69bc90d80b0c73f47f4b2e270b3ff6da99 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 29 Aug 2021 06:07:03 -0400
Subject: fs: Remove FS_THP_SUPPORT

Instead of setting a bit in the fs_flags to set a bit in the
address_space, set the bit in the address_space directly.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/fs.h      |  1 -
 include/linux/pagemap.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cb616fc1105..bbf812ce89a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2518,7 +2518,6 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
-#define FS_THP_SUPPORT		8192	/* Remove once all fs converted */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
 	const struct fs_parameter_spec *parameters;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1a0c646eb6ff..9e33878bf23b 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -176,6 +176,22 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 	m->gfp_mask = mask;
 }
 
+/**
+ * mapping_set_large_folios() - Indicate the file supports large folios.
+ * @mapping: The file.
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate that the VFS can use large folios to cache the contents of
+ * the file.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_large_folios(struct address_space *mapping)
+{
+	__set_bit(AS_THP_SUPPORT, &mapping->flags);
+}
+
 static inline bool mapping_thp_support(struct address_space *mapping)
 {
 	return test_bit(AS_THP_SUPPORT, &mapping->flags);
-- 
cgit v1.2.3


From ed2145c474c9015bc634e35f6d1a9b7767f3fbfc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 29 Aug 2021 06:28:19 -0400
Subject: fs: Rename AS_THP_SUPPORT and mapping_thp_support

These are now indicators of large folio support, not THP support.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagemap.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9e33878bf23b..605246452305 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -84,7 +84,7 @@ enum mapping_flags {
 	AS_EXITING	= 4, 	/* final truncate in progress */
 	/* writeback related tags are not used */
 	AS_NO_WRITEBACK_TAGS = 5,
-	AS_THP_SUPPORT = 6,	/* THPs supported */
+	AS_LARGE_FOLIO_SUPPORT = 6,
 };
 
 /**
@@ -189,12 +189,12 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  */
 static inline void mapping_set_large_folios(struct address_space *mapping)
 {
-	__set_bit(AS_THP_SUPPORT, &mapping->flags);
+	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
-static inline bool mapping_thp_support(struct address_space *mapping)
+static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
-	return test_bit(AS_THP_SUPPORT, &mapping->flags);
+	return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
@@ -209,7 +209,7 @@ static inline int filemap_nr_thps(struct address_space *mapping)
 static inline void filemap_nr_thps_inc(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-	if (!mapping_thp_support(mapping))
+	if (!mapping_large_folio_support(mapping))
 		atomic_inc(&mapping->nr_thps);
 #else
 	WARN_ON_ONCE(1);
@@ -219,7 +219,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping)
 static inline void filemap_nr_thps_dec(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-	if (!mapping_thp_support(mapping))
+	if (!mapping_large_folio_support(mapping))
 		atomic_dec(&mapping->nr_thps);
 #else
 	WARN_ON_ONCE(1);
-- 
cgit v1.2.3


From 75082e7f46809432131749f4ecea66864d0f7438 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 Nov 2021 02:01:30 -0800
Subject: net: add missing include in include/net/gro.h

This is needed for some arches, as reported by Geert Uytterhoeven,
Randy Dunlap and Stephen Rothwell

Fixes: 4721031c3559 ("net: move gro definitions to include/net/gro.h")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20211117100130.2368319-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/gro.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/gro.h b/include/net/gro.h
index d0e7df691a80..9c22a010369c 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -6,6 +6,7 @@
 #include <linux/indirect_call_wrapper.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
 #include <linux/skbuff.h>
 #include <net/udp.h>
 
-- 
cgit v1.2.3


From 5768d8906bc23d512b1a736c1e198aa833a6daa4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 15 Nov 2021 13:47:13 -0600
Subject: signal: Requeue signals in the appropriate queue

In the event that a tracer changes which signal needs to be delivered
and that signal is currently blocked then the signal needs to be
requeued for later delivery.

With the advent of CLONE_THREAD the kernel has 2 signal queues per
task.  The per process queue and the per task queue.  Update the code
so that if the signal is removed from the per process queue it is
requeued on the per process queue.  This is necessary to make it
appear the signal was never dequeued.

The rr debugger reasonably believes that the state of the process from
the last ptrace_stop it observed until PTRACE_EVENT_EXIT can be recreated
by simply letting a process run.  If a SIGKILL interrupts a ptrace_stop
this is not true today.

So return signals to their original queue in ptrace_signal so that
signals that are not delivered appear like they were never dequeued.

Fixes: 794aa320b79d ("[PATCH] sigfix-2.5.40-D6")
History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.gi
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/87zgq4d5r4.fsf_-_@email.froward.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 23505394ef70..167995d471da 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -286,17 +286,18 @@ static inline int signal_group_exit(const struct signal_struct *sig)
 extern void flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *task,
-			  sigset_t *mask, kernel_siginfo_t *info);
+extern int dequeue_signal(struct task_struct *task, sigset_t *mask,
+			  kernel_siginfo_t *info, enum pid_type *type);
 
 static inline int kernel_dequeue_signal(void)
 {
 	struct task_struct *task = current;
 	kernel_siginfo_t __info;
+	enum pid_type __type;
 	int ret;
 
 	spin_lock_irq(&task->sighand->siglock);
-	ret = dequeue_signal(task, &task->blocked, &__info);
+	ret = dequeue_signal(task, &task->blocked, &__info, &__type);
 	spin_unlock_irq(&task->sighand->siglock);
 
 	return ret;
-- 
cgit v1.2.3


From e0dbd7b0ed021fb9250f7ba4d759325678efefb5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 16 Nov 2021 23:44:28 +0100
Subject: power: supply: core: Add kerneldoc to battery struct

This complements the struct power_supply_battery_info with
extensive kerneldoc explaining the different semantics of the
fields, including an overview of the CC/CV charging concepts
implicit in some of the struct members.

This is done to first establish semantics before I can
add more charging methods by breaking out the CC/CV parameters
to its own struct.

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 215 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 192 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 2d1318fe2455..f6e94eae4f28 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -343,37 +343,206 @@ struct power_supply_resistance_temp_table {
 
 #define POWER_SUPPLY_OCV_TEMP_MAX 20
 
-/*
+/**
+ * struct power_supply_battery_info - information about batteries
+ * @technology: from the POWER_SUPPLY_TECHNOLOGY_* enum
+ * @energy_full_design_uwh: energy content when fully charged in microwatt
+ *   hours
+ * @charge_full_design_uah: charge content when fully charged in microampere
+ *   hours
+ * @voltage_min_design_uv: minimum voltage across the poles when the battery
+ *   is at minimum voltage level in microvolts. If the voltage drops below this
+ *   level the battery will need precharging when using CC/CV charging.
+ * @voltage_max_design_uv: voltage across the poles when the battery is fully
+ *   charged in microvolts. This is the "nominal voltage" i.e. the voltage
+ *   printed on the label of the battery.
+ * @tricklecharge_current_ua: the tricklecharge current used when trickle
+ *   charging the battery in microamperes. This is the charging phase when the
+ *   battery is completely empty and we need to carefully trickle in some
+ *   charge until we reach the precharging voltage.
+ * @precharge_current_ua: current to use in the precharge phase in microamperes,
+ *   the precharge rate is limited by limiting the current to this value.
+ * @precharge_voltage_max_uv: the maximum voltage allowed when precharging in
+ *   microvolts. When we pass this voltage we will nominally switch over to the
+ *   CC (constant current) charging phase defined by constant_charge_current_ua
+ *   and constant_charge_voltage_max_uv.
+ * @charge_term_current_ua: when the current in the CV (constant voltage)
+ *   charging phase drops below this value in microamperes the charging will
+ *   terminate completely and not restart until the voltage over the battery
+ *   poles reach charge_restart_voltage_uv unless we use maintenance charging.
+ * @charge_restart_voltage_uv: when the battery has been fully charged by
+ *   CC/CV charging and charging has been disabled, and the voltage subsequently
+ *   drops below this value in microvolts, the charging will be restarted
+ *   (typically using CV charging).
+ * @overvoltage_limit_uv: If the voltage exceeds the nominal voltage
+ *   voltage_max_design_uv and we reach this voltage level, all charging must
+ *   stop and emergency procedures take place, such as shutting down the system
+ *   in some cases.
+ * @constant_charge_current_max_ua: current in microamperes to use in the CC
+ *   (constant current) charging phase. The charging rate is limited
+ *   by this current. This is the main charging phase and as the current is
+ *   constant into the battery the voltage slowly ascends to
+ *   constant_charge_voltage_max_uv.
+ * @constant_charge_voltage_max_uv: voltage in microvolts signifying the end of
+ *   the CC (constant current) charging phase and the beginning of the CV
+ *   (constant voltage) charging phase.
+ * @factory_internal_resistance_uohm: the internal resistance of the battery
+ *   at fabrication time, expressed in microohms. This resistance will vary
+ *   depending on the lifetime and charge of the battery, so this is just a
+ *   nominal ballpark figure.
+ * @ocv_temp: array indicating the open circuit voltage (OCV) capacity
+ *   temperature indices. This is an array of temperatures in degrees Celsius
+ *   indicating which capacity table to use for a certain temperature, since
+ *   the capacity for reasons of chemistry will be different at different
+ *   temperatures. Determining capacity is a multivariate problem and the
+ *   temperature is the first variable we determine.
+ * @temp_ambient_alert_min: the battery will go outside of operating conditions
+ *   when the ambient temperature goes below this temperature in degrees
+ *   Celsius.
+ * @temp_ambient_alert_max: the battery will go outside of operating conditions
+ *   when the ambient temperature goes above this temperature in degrees
+ *   Celsius.
+ * @temp_alert_min: the battery should issue an alert if the internal
+ *   temperature goes below this temperature in degrees Celsius.
+ * @temp_alert_max: the battery should issue an alert if the internal
+ *   temperature goes above this temperature in degrees Celsius.
+ * @temp_min: the battery will go outside of operating conditions when
+ *   the internal temperature goes below this temperature in degrees Celsius.
+ *   Normally this means the system should shut down.
+ * @temp_max: the battery will go outside of operating conditions when
+ *   the internal temperature goes above this temperature in degrees Celsius.
+ *   Normally this means the system should shut down.
+ * @ocv_table: for each entry in ocv_temp there is a corresponding entry in
+ *   ocv_table and a size for each entry in ocv_table_size. These arrays
+ *   determine the capacity in percent in relation to the voltage in microvolts
+ *   at the indexed temperature.
+ * @ocv_table_size: for each entry in ocv_temp this array is giving the size of
+ *   each entry in the array of capacity arrays in ocv_table.
+ * @resist_table: this is a table that correlates a battery temperature to the
+ *   expected internal resistance at this temperature. The resistance is given
+ *   as a percentage of factory_internal_resistance_uohm. Knowing the
+ *   resistance of the battery is usually necessary for calculating the open
+ *   circuit voltage (OCV) that is then used with the ocv_table to calculate
+ *   the capacity of the battery. The resist_table must be ordered descending
+ *   by temperature: highest temperature with lowest resistance first, lowest
+ *   temperature with highest resistance last.
+ * @resist_table_size: the number of items in the resist_table.
+ *
  * This is the recommended struct to manage static battery parameters,
  * populated by power_supply_get_battery_info(). Most platform drivers should
  * use these for consistency.
+ *
  * Its field names must correspond to elements in enum power_supply_property.
  * The default field value is -EINVAL.
- * Power supply class itself doesn't use this.
+ *
+ * The charging parameters here assume a CC/CV charging scheme. This method
+ * is most common with Lithium Ion batteries (other methods are possible) and
+ * looks as follows:
+ *
+ * ^ Battery voltage
+ * |                                               --- overvoltage_limit_uv
+ * |
+ * |                    ...................................................
+ * |                 .. constant_charge_voltage_max_uv
+ * |              ..
+ * |             .
+ * |            .
+ * |           .
+ * |          .
+ * |         .
+ * |     .. precharge_voltage_max_uv
+ * |  ..
+ * |. (trickle charging)
+ * +------------------------------------------------------------------> time
+ *
+ * ^ Current into the battery
+ * |
+ * |      ............. constant_charge_current_max_ua
+ * |      .            .
+ * |      .             .
+ * |      .              .
+ * |      .               .
+ * |      .                ..
+ * |      .                  ....
+ * |      .                       .....
+ * |    ... precharge_current_ua       .......  charge_term_current_ua
+ * |    .                                    .
+ * |    .                                    .
+ * |.... tricklecharge_current_ua            .
+ * |                                         .
+ * +-----------------------------------------------------------------> time
+ *
+ * These diagrams are synchronized on time and the voltage and current
+ * follow each other.
+ *
+ * With CC/CV charging commence over time like this for an empty battery:
+ *
+ * 1. When the battery is completely empty it may need to be charged with
+ *    an especially small current so that electrons just "trickle in",
+ *    this is the tricklecharge_current_ua.
+ *
+ * 2. Next a small initial pre-charge current (precharge_current_ua)
+ *    is applied if the voltage is below precharge_voltage_max_uv until we
+ *    reach precharge_voltage_max_uv. CAUTION: in some texts this is referred
+ *    to as "trickle charging" but the use in the Linux kernel is different
+ *    see below!
+ *
+ * 3. Then the main charging current is applied, which is called the constant
+ *    current (CC) phase. A current regulator is set up to allow
+ *    constant_charge_current_max_ua of current to flow into the battery.
+ *    The chemical reaction in the battery will make the voltage go up as
+ *    charge goes into the battery. This current is applied until we reach
+ *    the constant_charge_voltage_max_uv voltage.
+ *
+ * 4. At this voltage we switch over to the constant voltage (CV) phase. This
+ *    means we allow current to go into the battery, but we keep the voltage
+ *    fixed. This current will continue to charge the battery while keeping
+ *    the voltage the same. A chemical reaction in the battery goes on
+ *    storing energy without affecting the voltage. Over time the current
+ *    will slowly drop and when we reach charge_term_current_ua we will
+ *    end the constant voltage phase.
+ *
+ * After this the battery is fully charged, and if we do not support maintenance
+ * charging, the charging will not restart until power dissipation makes the
+ * voltage fall so that we reach charge_restart_voltage_uv and at this point
+ * we restart charging at the appropriate phase, usually this will be inside
+ * the CV phase.
+ *
+ * If we support maintenance charging the voltage is however kept high after
+ * the CV phase with a very low current. This is meant to let the same charge
+ * go in for usage while the charger is still connected, mainly for
+ * dissipation for the power consuming entity while connected to the
+ * charger.
+ *
+ * All charging MUST terminate if the overvoltage_limit_uv is ever reached.
+ * Overcharging Lithium Ion cells can be DANGEROUS and lead to fire or
+ * explosions.
+ *
+ * The power supply class itself doesn't use this struct as of now.
  */
 
 struct power_supply_battery_info {
-	unsigned int technology;	    /* from the enum above */
-	int energy_full_design_uwh;	    /* microWatt-hours */
-	int charge_full_design_uah;	    /* microAmp-hours */
-	int voltage_min_design_uv;	    /* microVolts */
-	int voltage_max_design_uv;	    /* microVolts */
-	int tricklecharge_current_ua;	    /* microAmps */
-	int precharge_current_ua;	    /* microAmps */
-	int precharge_voltage_max_uv;	    /* microVolts */
-	int charge_term_current_ua;	    /* microAmps */
-	int charge_restart_voltage_uv;	    /* microVolts */
-	int overvoltage_limit_uv;	    /* microVolts */
-	int constant_charge_current_max_ua; /* microAmps */
-	int constant_charge_voltage_max_uv; /* microVolts */
-	int factory_internal_resistance_uohm;   /* microOhms */
-	int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];/* celsius */
-	int temp_ambient_alert_min;             /* celsius */
-	int temp_ambient_alert_max;             /* celsius */
-	int temp_alert_min;                     /* celsius */
-	int temp_alert_max;                     /* celsius */
-	int temp_min;                           /* celsius */
-	int temp_max;                           /* celsius */
+	unsigned int technology;
+	int energy_full_design_uwh;
+	int charge_full_design_uah;
+	int voltage_min_design_uv;
+	int voltage_max_design_uv;
+	int tricklecharge_current_ua;
+	int precharge_current_ua;
+	int precharge_voltage_max_uv;
+	int charge_term_current_ua;
+	int charge_restart_voltage_uv;
+	int overvoltage_limit_uv;
+	int constant_charge_current_max_ua;
+	int constant_charge_voltage_max_uv;
+	int factory_internal_resistance_uohm;
+	int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];
+	int temp_ambient_alert_min;
+	int temp_ambient_alert_max;
+	int temp_alert_min;
+	int temp_alert_max;
+	int temp_min;
+	int temp_max;
 	struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX];
 	int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX];
 	struct power_supply_resistance_temp_table *resist_table;
-- 
cgit v1.2.3


From efb931cdc4b94a0f7ed17a76844f08cef1bdffe5 Mon Sep 17 00:00:00 2001
From: Ajit Kumar Pandey <AjitKumar.Pandey@amd.com>
Date: Wed, 17 Nov 2021 11:37:24 +0200
Subject: ASoC: SOF: topology: Add support for AMD ACP DAIs

Add new sof dais and config to pass topology file configuration
to SOF firmware running on ACP's DSP core. ACP firmware support
I2S_BT, I2S_SP and DMIC controller hence add three new dais to
the list of supported sof_dais

Signed-off-by: Ajit Kumar Pandey <AjitKumar.Pandey@amd.com>
Reviewed-by: Bard Liao <bard.liao@intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://lore.kernel.org/r/20211117093734.17407-12-daniel.baluta@oss.nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai-amd.h | 21 +++++++++++++++++++++
 include/sound/sof/dai.h     |  7 +++++++
 2 files changed, 28 insertions(+)
 create mode 100644 include/sound/sof/dai-amd.h

(limited to 'include')

diff --git a/include/sound/sof/dai-amd.h b/include/sound/sof/dai-amd.h
new file mode 100644
index 000000000000..90d09dbdd709
--- /dev/null
+++ b/include/sound/sof/dai-amd.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Advanced Micro Devices, Inc.. All rights reserved.
+ */
+
+#ifndef __INCLUDE_SOUND_SOF_DAI_AMD_H__
+#define __INCLUDE_SOUND_SOF_DAI_AMD_H__
+
+#include <sound/sof/header.h>
+
+/* ACP Configuration Request - SOF_IPC_DAI_AMD_CONFIG */
+struct sof_ipc_dai_acp_params {
+	struct sof_ipc_hdr hdr;
+
+	uint32_t fsync_rate;    /* FSYNC frequency in Hz */
+	uint32_t tdm_slots;
+} __packed;
+#endif
diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 9625f47557b8..3782127a7095 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -12,6 +12,7 @@
 #include <sound/sof/header.h>
 #include <sound/sof/dai-intel.h>
 #include <sound/sof/dai-imx.h>
+#include <sound/sof/dai-amd.h>
 
 /*
  * DAI Configuration.
@@ -66,6 +67,9 @@ enum sof_ipc_dai_type {
 	SOF_DAI_INTEL_ALH,		/**< Intel ALH  */
 	SOF_DAI_IMX_SAI,		/**< i.MX SAI */
 	SOF_DAI_IMX_ESAI,		/**< i.MX ESAI */
+	SOF_DAI_AMD_BT,			/**< AMD ACP BT*/
+	SOF_DAI_AMD_SP,			/**< AMD ACP SP */
+	SOF_DAI_AMD_DMIC,		/**< AMD ACP DMIC */
 };
 
 /* general purpose DAI configuration */
@@ -90,6 +94,9 @@ struct sof_ipc_dai_config {
 		struct sof_ipc_dai_alh_params alh;
 		struct sof_ipc_dai_esai_params esai;
 		struct sof_ipc_dai_sai_params sai;
+		struct sof_ipc_dai_acp_params acpbt;
+		struct sof_ipc_dai_acp_params acpsp;
+		struct sof_ipc_dai_acp_params acpdmic;
 	};
 } __packed;
 
-- 
cgit v1.2.3


From 6bb835f3d00467c9a5e35f4955afa29df96a404e Mon Sep 17 00:00:00 2001
From: Andriy Tryshnivskyy <andriy.tryshnivskyy@opensynergy.com>
Date: Sun, 24 Oct 2021 12:16:26 +0300
Subject: iio: core: Introduce IIO_VAL_INT_64.

Introduce IIO_VAL_INT_64 to read 64-bit value for
channel attribute. Val is used as lower 32 bits.

Signed-off-by: Andriy Tryshnivskyy <andriy.tryshnivskyy@opensynergy.com>
Link: https://lore.kernel.org/r/20211024091627.28031-2-andriy.tryshnivskyy@opensynergy.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 84b3f8175cc6..a7aa91f3a8dc 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -24,6 +24,7 @@ enum iio_event_info {
 #define IIO_VAL_INT_PLUS_NANO 3
 #define IIO_VAL_INT_PLUS_MICRO_DB 4
 #define IIO_VAL_INT_MULTIPLE 5
+#define IIO_VAL_INT_64 6 /* 64-bit data, val is lower 32 bits */
 #define IIO_VAL_FRACTIONAL 10
 #define IIO_VAL_FRACTIONAL_LOG2 11
 #define IIO_VAL_CHAR 12
-- 
cgit v1.2.3


From b5f57384805a34f497edb8b04d694a8a1b3d81d4 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 10 Sep 2021 15:18:20 -0400
Subject: drm/amdkfd: Add sysfs bitfields and enums to uAPI

These bits are de-facto part of the uAPI, so declare them in a uAPI header.

The corresponding bit-fields and enums in user mode are defined in
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/master/include/hsakmttypes.h

HSA_CAP_...           -> HSA_CAPABILITY
HSA_MEM_HEAP_TYPE_... -> HSA_HEAPTYPE
HSA_MEM_FLAGS_...     -> HSA_MEMORYPROPERTY
HSA_CACHE_TYPE_...    -> HsaCacheType
HSA_IOLINK_TYPE_...   -> HSA_IOLINKTYPE
HSA_IOLINK_FLAGS_...  -> HSA_LINKPROPERTY

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_sysfs.h | 108 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 include/uapi/linux/kfd_sysfs.h

(limited to 'include')

diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
new file mode 100644
index 000000000000..e1fb78b4bf09
--- /dev/null
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_SYSFS_H_INCLUDED
+#define KFD_SYSFS_H_INCLUDED
+
+/* Capability bits in node properties */
+#define HSA_CAP_HOT_PLUGGABLE			0x00000001
+#define HSA_CAP_ATS_PRESENT			0x00000002
+#define HSA_CAP_SHARED_WITH_GRAPHICS		0x00000004
+#define HSA_CAP_QUEUE_SIZE_POW2			0x00000008
+#define HSA_CAP_QUEUE_SIZE_32BIT		0x00000010
+#define HSA_CAP_QUEUE_IDLE_EVENT		0x00000020
+#define HSA_CAP_VA_LIMIT			0x00000040
+#define HSA_CAP_WATCH_POINTS_SUPPORTED		0x00000080
+#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK	0x00000f00
+#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT	8
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK	0x00003000
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT	12
+
+#define HSA_CAP_DOORBELL_TYPE_PRE_1_0		0x0
+#define HSA_CAP_DOORBELL_TYPE_1_0		0x1
+#define HSA_CAP_DOORBELL_TYPE_2_0		0x2
+#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000
+
+/* Old buggy user mode depends on this being 0 */
+#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED	0x00080000
+
+#define HSA_CAP_MEM_EDCSUPPORTED		0x00100000
+#define HSA_CAP_RASEVENTNOTIFY			0x00200000
+#define HSA_CAP_ASIC_REVISION_MASK		0x03c00000
+#define HSA_CAP_ASIC_REVISION_SHIFT		22
+#define HSA_CAP_SRAM_EDCSUPPORTED		0x04000000
+#define HSA_CAP_SVMAPI_SUPPORTED		0x08000000
+#define HSA_CAP_FLAGS_COHERENTHOSTACCESS	0x10000000
+#define HSA_CAP_RESERVED			0xe00f8000
+
+/* Heap types in memory properties */
+#define HSA_MEM_HEAP_TYPE_SYSTEM	0
+#define HSA_MEM_HEAP_TYPE_FB_PUBLIC	1
+#define HSA_MEM_HEAP_TYPE_FB_PRIVATE	2
+#define HSA_MEM_HEAP_TYPE_GPU_GDS	3
+#define HSA_MEM_HEAP_TYPE_GPU_LDS	4
+#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH	5
+
+/* Flag bits in memory properties */
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE		0x00000001
+#define HSA_MEM_FLAGS_NON_VOLATILE		0x00000002
+#define HSA_MEM_FLAGS_RESERVED			0xfffffffc
+
+/* Cache types in cache properties */
+#define HSA_CACHE_TYPE_DATA		0x00000001
+#define HSA_CACHE_TYPE_INSTRUCTION	0x00000002
+#define HSA_CACHE_TYPE_CPU		0x00000004
+#define HSA_CACHE_TYPE_HSACU		0x00000008
+#define HSA_CACHE_TYPE_RESERVED		0xfffffff0
+
+/* Link types in IO link properties (matches CRAT link types) */
+#define HSA_IOLINK_TYPE_UNDEFINED	0
+#define HSA_IOLINK_TYPE_HYPERTRANSPORT	1
+#define HSA_IOLINK_TYPE_PCIEXPRESS	2
+#define HSA_IOLINK_TYPE_AMBA		3
+#define HSA_IOLINK_TYPE_MIPI		4
+#define HSA_IOLINK_TYPE_QPI_1_1	5
+#define HSA_IOLINK_TYPE_RESERVED1	6
+#define HSA_IOLINK_TYPE_RESERVED2	7
+#define HSA_IOLINK_TYPE_RAPID_IO	8
+#define HSA_IOLINK_TYPE_INFINIBAND	9
+#define HSA_IOLINK_TYPE_RESERVED3	10
+#define HSA_IOLINK_TYPE_XGMI		11
+#define HSA_IOLINK_TYPE_XGOP		12
+#define HSA_IOLINK_TYPE_GZ		13
+#define HSA_IOLINK_TYPE_ETHERNET_RDMA	14
+#define HSA_IOLINK_TYPE_RDMA_OTHER	15
+#define HSA_IOLINK_TYPE_OTHER		16
+
+/* Flag bits in IO link properties (matches CRAT flags, excluding the
+ * bi-directional flag, which is not offially part of the CRAT spec, and
+ * only used internally in KFD)
+ */
+#define HSA_IOLINK_FLAGS_ENABLED		(1 << 0)
+#define HSA_IOLINK_FLAGS_NON_COHERENT		(1 << 1)
+#define HSA_IOLINK_FLAGS_NO_ATOMICS_32_BIT	(1 << 2)
+#define HSA_IOLINK_FLAGS_NO_ATOMICS_64_BIT	(1 << 3)
+#define HSA_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA	(1 << 4)
+#define HSA_IOLINK_FLAGS_RESERVED		0xffffffe0
+
+#endif
-- 
cgit v1.2.3


From 2925748eadc33cba3bded7b69475a1b002b124ac Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 17 Nov 2021 13:22:53 +0000
Subject: firmware: cs_dsp: Add version checks on coefficient loading

The firmware coefficient files contain version information that is
currently ignored by the cs_dsp code. This information specifies which
version of the firmware the coefficient were generated for. Add a check
into the code which prints a warning in the case the coefficient and
firmware differ in version, in many cases this will be ok but it is not
always, so best to let the user know there is a potential issue.

Co-authored-by: Simon Trimmer <simont@opensource.cirrus.com>
Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211117132300.1290-3-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 3a54b1afc48f..ce54705e2bec 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -54,12 +54,14 @@ struct cs_dsp_region {
  * struct cs_dsp_alg_region - Describes a logical algorithm region in DSP address space
  * @list:	List node for internal use
  * @alg:	Algorithm id
+ * @ver:	Expected algorithm version
  * @type:	Memory region type
  * @base:	Address of region
  */
 struct cs_dsp_alg_region {
 	struct list_head list;
 	unsigned int alg;
+	unsigned int ver;
 	int type;
 	unsigned int base;
 };
-- 
cgit v1.2.3


From 14055b5a3a23204c4702ae5d3f2a819ee081ce33 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 17 Nov 2021 13:22:54 +0000
Subject: firmware: cs_dsp: Add pre_run callback

The code already has a post_run callback, add a matching pre_run
callback to the client_ops that is called before execution is started.
This callback provides a convenient place for the client code to
set DSP controls or hardware that requires configuration before
the DSP core actually starts execution. Note that placing this callback
before cs_dsp_coeff_sync_controls is important to ensure that any
control values are then correctly synced out to the chip.

Co-authored-by: Simon Trimmer <simont@opensource.cirrus.com>
Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211117132300.1290-4-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index ce54705e2bec..0bf849baeaa5 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -187,7 +187,8 @@ struct cs_dsp {
  * struct cs_dsp_client_ops - client callbacks
  * @control_add:	Called under the pwr_lock when a control is created
  * @control_remove:	Called under the pwr_lock when a control is destroyed
- * @post_run:		Called under the pwr_lock by cs_dsp_run()
+ * @pre_run:		Called under the pwr_lock by cs_dsp_run() before the core is started
+ * @post_run:		Called under the pwr_lock by cs_dsp_run() after the core is started
  * @post_stop:		Called under the pwr_lock by cs_dsp_stop()
  * @watchdog_expired:	Called when a watchdog expiry is detected
  *
@@ -197,6 +198,7 @@ struct cs_dsp {
 struct cs_dsp_client_ops {
 	int (*control_add)(struct cs_dsp_coeff_ctl *ctl);
 	void (*control_remove)(struct cs_dsp_coeff_ctl *ctl);
+	int (*pre_run)(struct cs_dsp *dsp);
 	int (*post_run)(struct cs_dsp *dsp);
 	void (*post_stop)(struct cs_dsp *dsp);
 	void (*watchdog_expired)(struct cs_dsp *dsp);
-- 
cgit v1.2.3


From b329b3d39497a9fdb175d7e4fd77ae7170d5d26c Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 17 Nov 2021 13:22:58 +0000
Subject: firmware: cs_dsp: Clarify some kernel doc comments

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211117132300.1290-8-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 0bf849baeaa5..1ad1b173417a 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -76,8 +76,8 @@ struct cs_dsp_alg_region {
  * @enabled:		Flag indicating whether control is enabled
  * @list:		List node for internal use
  * @cache:		Cached value of the control
- * @offset:		Offset of control within alg_region
- * @len:		Length of the cached value
+ * @offset:		Offset of control within alg_region in words
+ * @len:		Length of the cached value in bytes
  * @set:		Flag indicating the value has been written by the user
  * @flags:		Bitfield of WMFW_CTL_FLAG_ control flags defined in wmfw.h
  * @type:		One of the WMFW_CTL_TYPE_ control types defined in wmfw.h
-- 
cgit v1.2.3


From f444da38ac924748de696c393327a44c4b8d727e Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 17 Nov 2021 13:22:59 +0000
Subject: firmware: cs_dsp: Add offset to cs_dsp read/write

Provide a mechanism to access only part of a control through the cs_dsp
interface.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211117132300.1290-9-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 1ad1b173417a..38b4da3ddfe4 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -232,8 +232,10 @@ void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root);
 void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp);
 
 int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int event_id);
-int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, const void *buf, size_t len);
-int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, void *buf, size_t len);
+int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off,
+			    const void *buf, size_t len);
+int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off,
+			   void *buf, size_t len);
 struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp, const char *name, int type,
 					unsigned int alg);
 
-- 
cgit v1.2.3


From 5c903f64ce97172d63f7591cfa9e37cba58867b2 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 17 Nov 2021 13:23:00 +0000
Subject: firmware: cs_dsp: Allow creation of event controls

Some firmwares contain controls intended to convey firmware state back
to the host. Whilst more infrastructure will probably be needed for
these in time, as a first step allow creation of the controls, so said
firmwares arn't completely rejected.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211117132300.1290-10-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/wmfw.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/wmfw.h b/include/linux/firmware/cirrus/wmfw.h
index a19bf7c6fc8b..74e5a4f6c13a 100644
--- a/include/linux/firmware/cirrus/wmfw.h
+++ b/include/linux/firmware/cirrus/wmfw.h
@@ -29,6 +29,7 @@
 #define WMFW_CTL_TYPE_ACKED       0x1000 /* acked control */
 #define WMFW_CTL_TYPE_HOSTEVENT   0x1001 /* event control */
 #define WMFW_CTL_TYPE_HOST_BUFFER 0x1002 /* host buffer pointer */
+#define WMFW_CTL_TYPE_FWEVENT     0x1004 /* firmware event control */
 
 struct wmfw_header {
 	char magic[4];
-- 
cgit v1.2.3


From f58a435311672305d8747f40e35235f7ed64ae69 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Fri, 5 Nov 2021 14:33:41 -0400
Subject: drm/dp, drm/i915: Add support for VESA backlights using PWM for
 brightness control

Now that we've added support to i915 for controlling panel backlights that
need PWM to be enabled/disabled, let's finalize this and add support for
controlling brightness levels via PWM as well. This should hopefully put us
towards the path of supporting _ALL_ backlights via VESA's DPCD interface
which would allow us to finally start trusting the DPCD again.

Note however that we still don't enable using this by default on i915 when
it's not needed, primarily because I haven't yet had a chance to confirm if
it's safe to do this on the one machine in Intel's CI that had an issue
with this: samus-fi-bdw. I have done basic testing of this on other
machines though, by manually patching i915 to force it into PWM-only mode
on some of my laptops.

v2:
* Correct documentation (thanks Doug!)
* Get rid of backlight caps

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Cc: Rajeev Nandan <rajeevny@codeaurora.org>
Cc: Satadru Pramanik <satadru@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211105183342.130810-5-lyude@redhat.com
---
 include/drm/drm_dp_helper.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index afdf7f4183f9..8b2ed4199284 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -1868,7 +1868,7 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
  *
  * Note that currently this function will return %false for panels which support various DPCD
  * backlight features but which require the brightness be set through PWM, and don't support setting
- * the brightness level via the DPCD. This is a TODO.
+ * the brightness level via the DPCD.
  *
  * Returns: %True if @edp_dpcd indicates that VESA backlight controls are supported, %false
  * otherwise
@@ -1876,8 +1876,7 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
 static inline bool
 drm_edp_backlight_supported(const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE])
 {
-	return (edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP) &&
-		(edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP);
+	return !!(edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP);
 }
 
 /*
@@ -2238,6 +2237,7 @@ drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk)
  * @max: The maximum backlight level that may be set
  * @lsb_reg_used: Do we also write values to the DP_EDP_BACKLIGHT_BRIGHTNESS_LSB register?
  * @aux_enable: Does the panel support the AUX enable cap?
+ * @aux_set: Does the panel support setting the brightness through AUX?
  *
  * This structure contains various data about an eDP backlight, which can be populated by using
  * drm_edp_backlight_init().
@@ -2249,6 +2249,7 @@ struct drm_edp_backlight_info {
 
 	bool lsb_reg_used : 1;
 	bool aux_enable : 1;
+	bool aux_set : 1;
 };
 
 int
-- 
cgit v1.2.3


From 268bb03856ed6c8511c31d08de0148782f50822f Mon Sep 17 00:00:00 2001
From: Thiago Rafael Becker <trbecker@gmail.com>
Date: Wed, 17 Nov 2021 10:26:30 -0300
Subject: sunrpc: fix header include guard in trace header

rpcgss.h include protection was protecting against the define for
rpcrdma.h.

Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/trace/events/rpcgss.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
index 3ba63319af3c..c9048f3e471b 100644
--- a/include/trace/events/rpcgss.h
+++ b/include/trace/events/rpcgss.h
@@ -8,7 +8,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM rpcgss
 
-#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#if !defined(_TRACE_RPCGSS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_RPCGSS_H
 
 #include <linux/tracepoint.h>
-- 
cgit v1.2.3


From 48b71a9e66c2eab60564b1b1c85f4928ed04e406 Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Tue, 16 Nov 2021 23:27:32 +0800
Subject: NFC: add NCI_UNREG flag to eliminate the race

There are two sites that calls queue_work() after the
destroy_workqueue() and lead to possible UAF.

The first site is nci_send_cmd(), which can happen after the
nci_close_device as below

nfcmrvl_nci_unregister_dev   |  nfc_genl_dev_up
  nci_close_device           |
    flush_workqueue          |
    del_timer_sync           |
  nci_unregister_device      |    nfc_get_device
    destroy_workqueue        |    nfc_dev_up
    nfc_unregister_device    |      nci_dev_up
      device_del             |        nci_open_device
                             |          __nci_request
                             |            nci_send_cmd
                             |              queue_work !!!

Another site is nci_cmd_timer, awaked by the nci_cmd_work from the
nci_send_cmd.

  ...                        |  ...
  nci_unregister_device      |  queue_work
    destroy_workqueue        |
    nfc_unregister_device    |  ...
      device_del             |  nci_cmd_work
                             |  mod_timer
                             |  ...
                             |  nci_cmd_timer
                             |    queue_work !!!

For the above two UAF, the root cause is that the nfc_dev_up can race
between the nci_unregister_device routine. Therefore, this patch
introduce NCI_UNREG flag to easily eliminate the possible race. In
addition, the mutex_lock in nci_close_device can act as a barrier.

Signed-off-by: Lin Ma <linma@zju.edu.cn>
Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation")
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20211116152732.19238-1-linma@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/nfc/nci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index a964daedc17b..ea8595651c38 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -30,6 +30,7 @@ enum nci_flag {
 	NCI_UP,
 	NCI_DATA_EXCHANGE,
 	NCI_DATA_EXCHANGE_TO,
+	NCI_UNREG,
 };
 
 /* NCI device states */
-- 
cgit v1.2.3


From 8ff978b8b222bc9d51dd109a46b51026336c95d8 Mon Sep 17 00:00:00 2001
From: Riccardo Paolo Bestetti <pbl@bestov.io>
Date: Wed, 17 Nov 2021 10:00:11 +0100
Subject: ipv4/raw: support binding to nonlocal addresses

Add support to inet v4 raw sockets for binding to nonlocal addresses
through the IP_FREEBIND and IP_TRANSPARENT socket options, as well as
the ipv4.ip_nonlocal_bind kernel parameter.

Add helper function to inet_sock.h to check for bind address validity on
the base of the address type and whether nonlocal address are enabled
for the socket via any of the sockopts/sysctl, deduplicating checks in
ipv4/ping.c, ipv4/af_inet.c, ipv6/af_inet6.c (for mapped v4->v6
addresses), and ipv4/raw.c.

Add test cases with IP[V6]_FREEBIND verifying that both v4 and v6 raw
sockets support binding to nonlocal addresses after the change. Add
necessary support for the test cases to nettest.

Signed-off-by: Riccardo Paolo Bestetti <pbl@bestov.io>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20211117090010.125393-1-pbl@bestov.io
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_sock.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 9e1111f5915b..234d70ae5f4c 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -372,4 +372,16 @@ static inline bool inet_can_nonlocal_bind(struct net *net,
 		inet->freebind || inet->transparent;
 }
 
+static inline bool inet_addr_valid_or_nonlocal(struct net *net,
+					       struct inet_sock *inet,
+					       __be32 addr,
+					       int addr_type)
+{
+	return inet_can_nonlocal_bind(net, inet) ||
+		addr == htonl(INADDR_ANY) ||
+		addr_type == RTN_LOCAL ||
+		addr_type == RTN_MULTICAST ||
+		addr_type == RTN_BROADCAST;
+}
+
 #endif	/* _INET_SOCK_H */
-- 
cgit v1.2.3


From 357a18ad230f0867791b788d2b1d6f280f6f6e61 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Mon, 15 Nov 2021 16:50:27 +0000
Subject: KVM: Kill kvm_map_gfn() / kvm_unmap_gfn() and gfn_to_pfn_cache

In commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time /
preempted status") I removed the only user of these functions because
it was basically impossible to use them safely.

There are two stages to the GFN->PFN mapping; first through the KVM
memslots to a userspace HVA and then through the page tables to
translate that HVA to an underlying PFN. Invalidations of the former
were being handled correctly, but no attempt was made to use the MMU
notifiers to invalidate the cache when the HVA->GFN mapping changed.

As a prelude to reinventing the gfn_to_pfn_cache with more usable
semantics, rip it out entirely and untangle the implementation of
the unsafe kvm_vcpu_map()/kvm_vcpu_unmap() functions from it.

All current users of kvm_vcpu_map() also look broken right now, and
will be dealt with separately. They broadly fall into two classes:

* Those which map, access the data and immediately unmap. This is
  mostly gratuitous and could just as well use the existing user
  HVA, and could probably benefit from a gfn_to_hva_cache as they
  do so.

* Those which keep the mapping around for a longer time, perhaps
  even using the PFN directly from the guest. These will need to
  be converted to the new gfn_to_pfn_cache and then kvm_vcpu_map()
  can be removed too.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211115165030.7422-8-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 6 +-----
 include/linux/kvm_types.h | 7 -------
 2 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 60a35d9fe259..eb625af4fc5e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -866,7 +866,7 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -942,12 +942,8 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
 kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
 kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-		struct gfn_to_pfn_cache *cache, bool atomic);
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
-		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2237abb93ccd..234eab059839 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -53,13 +53,6 @@ struct gfn_to_hva_cache {
 	struct kvm_memory_slot *memslot;
 };
 
-struct gfn_to_pfn_cache {
-	u64 generation;
-	gfn_t gfn;
-	kvm_pfn_t pfn;
-	bool dirty;
-};
-
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 /*
  * Memory caches are used to preallocate memory ahead of various MMU flows,
-- 
cgit v1.2.3


From f915b75bffb7257bd8d26376b8e1cc67771927f8 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 17 Nov 2021 15:56:52 +0800
Subject: page_pool: Revert "page_pool: disable dma mapping support..."

This reverts commit d00e60ee54b12de945b8493cf18c1ada9e422514.

As reported by Guillaume in [1]:
Enabling LPAE always enables CONFIG_ARCH_DMA_ADDR_T_64BIT
in 32-bit systems, which breaks the bootup proceess when a
ethernet driver is using page pool with PP_FLAG_DMA_MAP flag.
As we were hoping we had no active consumers for such system
when we removed the dma mapping support, and LPAE seems like
a common feature for 32 bits system, so revert it.

1. https://www.spinics.net/lists/netdev/msg779890.html

Fixes: d00e60ee54b1 ("page_pool: disable dma mapping support for 32-bit arch with 64-bit DMA")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Tested-by: "kernelci.org bot" <bot@kernelci.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mm_types.h | 13 ++++++++++++-
 include/net/page_pool.h  | 12 +++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb8c6f5f19bc..c3a6e6209600 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -105,7 +105,18 @@ struct page {
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr;
-			atomic_long_t pp_frag_count;
+			union {
+				/**
+				 * dma_addr_upper: might require a 64-bit
+				 * value on 32-bit architectures.
+				 */
+				unsigned long dma_addr_upper;
+				/**
+				 * For frag page support, not supported in
+				 * 32-bit architectures with 64-bit DMA.
+				 */
+				atomic_long_t pp_frag_count;
+			};
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 3855f069627f..a4082406a003 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -216,14 +216,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 	page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
+		(sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr;
+
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+	return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
 	page->dma_addr = addr;
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		page->dma_addr_upper = upper_32_bits(addr);
 }
 
 static inline void page_pool_set_frag_count(struct page *page, long nr)
-- 
cgit v1.2.3


From df6160deb3debe6f964c16349f9431157ff67dda Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 Nov 2021 17:57:29 -0800
Subject: tcp: add missing htmldocs for skb->ll_node and sk->defer_list

Add missing entries to fix these "make htmldocs" warnings.

./include/linux/skbuff.h:953: warning: Function parameter or member 'll_node' not described in 'sk_buff'
./include/net/sock.h:540: warning: Function parameter or member 'defer_list' not described in 'sock'

Fixes: f35f821935d8 ("tcp: defer skb freeing after socket lock is released")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 include/net/sock.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b8b806512e16..100fd604fbc9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -627,6 +627,7 @@ typedef unsigned char *sk_buff_data_t;
  *		for retransmit timer
  *	@rbnode: RB tree node, alternative to next/prev for netem/tcp
  *	@list: queue head
+ *	@ll_node: anchor in an llist (eg socket defer_list)
  *	@sk: Socket we are owned by
  *	@ip_defrag_offset: (aka @sk) alternate use of @sk, used in
  *		fragmentation management
diff --git a/include/net/sock.h b/include/net/sock.h
index f09c0c4736c4..a79fc772324e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -292,6 +292,7 @@ struct bpf_local_storage;
   *	@sk_pacing_shift: scaling factor for TCP Small Queues
   *	@sk_lingertime: %SO_LINGER l_linger setting
   *	@sk_backlog: always used with the per-socket spinlock held
+  *	@defer_list: head of llist storing skbs to be freed
   *	@sk_callback_lock: used with the callbacks in the end of this struct
   *	@sk_error_queue: rarely used
   *	@sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
-- 
cgit v1.2.3


From 0568c3bf3f34ad2f86e6b2dfaa0855aad9c1c562 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 18 Nov 2021 18:11:57 +0800
Subject: net: mscc: ocelot: add MAC table stream learn and lookup operations

ocelot_mact_learn_streamdata() can be used in VSC9959 to overwrite an
FDB entry with stream data. The stream data includes SFID and SSID which
can be used for PSFP and FRER set.

ocelot_mact_lookup() can be used to check if the given {DMAC, VID} FDB
entry is exist, and also can retrieve the DEST_IDX and entry type for
the FDB entry.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index fef3a36b0210..1d5ff11e4100 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -593,6 +593,19 @@ enum ocelot_sb_pool {
 	OCELOT_SB_POOL_NUM,
 };
 
+/* MAC table entry types.
+ * ENTRYTYPE_NORMAL is subject to aging.
+ * ENTRYTYPE_LOCKED is not subject to aging.
+ * ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast.
+ * ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast.
+ */
+enum macaccess_entry_type {
+	ENTRYTYPE_NORMAL = 0,
+	ENTRYTYPE_LOCKED,
+	ENTRYTYPE_MACv4,
+	ENTRYTYPE_MACv6,
+};
+
 #define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION	BIT(0)
 #define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP		BIT(1)
 
@@ -870,6 +883,15 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
 				bool tx_pause, bool rx_pause,
 				unsigned long quirks);
 
+int ocelot_mact_lookup(struct ocelot *ocelot, int *dst_idx,
+		       const unsigned char mac[ETH_ALEN],
+		       unsigned int vid, enum macaccess_entry_type *type);
+int ocelot_mact_learn_streamdata(struct ocelot *ocelot, int dst_idx,
+				 const unsigned char mac[ETH_ALEN],
+				 unsigned int vid,
+				 enum macaccess_entry_type type,
+				 int sfid, int ssid);
+
 #if IS_ENABLED(CONFIG_BRIDGE_MRP)
 int ocelot_mrp_add(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp);
-- 
cgit v1.2.3


From 23e2c506ad6c588b469e3d06cc20299434440d02 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 18 Nov 2021 18:11:59 +0800
Subject: net: mscc: ocelot: add gate and police action offload to PSFP

PSFP support gate and police action. This patch add the gate and police
action to flower parse action, check chain ID to determine which block
to offload. Adding psfp callback functions to add, delete and update gate
and police in PSFP table if hardware supports it.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h      | 5 +++++
 include/soc/mscc/ocelot_vcap.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 1d5ff11e4100..e9985ace59c0 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -555,6 +555,11 @@ struct ocelot_ops {
 	u16 (*wm_enc)(u16 value);
 	u16 (*wm_dec)(u16 value);
 	void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse);
+	void (*psfp_init)(struct ocelot *ocelot);
+	int (*psfp_filter_add)(struct ocelot *ocelot, struct flow_cls_offload *f);
+	int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f);
+	int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f,
+			      struct flow_stats *stats);
 };
 
 struct ocelot_vcap_block {
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index eeb1142aa1b1..9cca2f8e61a2 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -656,6 +656,7 @@ enum ocelot_vcap_filter_type {
 	OCELOT_VCAP_FILTER_DUMMY,
 	OCELOT_VCAP_FILTER_PAG,
 	OCELOT_VCAP_FILTER_OFFLOAD,
+	OCELOT_PSFP_FILTER_OFFLOAD,
 };
 
 struct ocelot_vcap_id {
-- 
cgit v1.2.3


From 7d4b564d6adde3167dd015f7dbb7aee1d7a4294e Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 18 Nov 2021 18:12:00 +0800
Subject: net: dsa: felix: support psfp filter on vsc9959

VSC9959 supports Per-Stream Filtering and Policing(PSFP) that complies
with the IEEE 802.1Qci standard. The stream is identified by Null stream
identification(DMAC and VLAN ID) defined in IEEE802.1CB.

For PSFP, four tables need to be set up: stream table, stream filter
table, stream gate table, and flow meter table. Identify the stream by
parsing the tc flower keys and add it to the stream table. The stream
filter table is automatically maintained, and its index is determined by
SGID(flow gate index) and FMID(flow meter index).

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h     |  8 ++++++++
 include/soc/mscc/ocelot_ana.h | 10 ++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index e9985ace59c0..5ea72d274d7f 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -586,6 +586,12 @@ enum ocelot_port_tag_config {
 	OCELOT_PORT_TAG_TRUNK = 3,
 };
 
+struct ocelot_psfp_list {
+	struct list_head stream_list;
+	struct list_head sfi_list;
+	struct list_head sgi_list;
+};
+
 enum ocelot_sb {
 	OCELOT_SB_BUF,
 	OCELOT_SB_REF,
@@ -687,6 +693,8 @@ struct ocelot {
 	struct ocelot_vcap_block	block[3];
 	struct vcap_props		*vcap;
 
+	struct ocelot_psfp_list		psfp;
+
 	/* Workqueue to check statistics for overflow with its lock */
 	struct mutex			stats_lock;
 	u64				*stats;
diff --git a/include/soc/mscc/ocelot_ana.h b/include/soc/mscc/ocelot_ana.h
index 1669481d9779..67e0ae05a5ab 100644
--- a/include/soc/mscc/ocelot_ana.h
+++ b/include/soc/mscc/ocelot_ana.h
@@ -227,6 +227,11 @@
 #define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD(x)             ((x) & GENMASK(1, 0))
 #define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD_M              GENMASK(1, 0)
 
+#define SFIDACCESS_CMD_IDLE                               0
+#define SFIDACCESS_CMD_READ                               1
+#define SFIDACCESS_CMD_WRITE                              2
+#define SFIDACCESS_CMD_INIT                               3
+
 #define ANA_TABLES_SFIDTIDX_SGID_VALID                    BIT(26)
 #define ANA_TABLES_SFIDTIDX_SGID(x)                       (((x) << 18) & GENMASK(25, 18))
 #define ANA_TABLES_SFIDTIDX_SGID_M                        GENMASK(25, 18)
@@ -255,6 +260,11 @@
 #define ANA_SG_CONFIG_REG_3_INIT_IPS(x)                   (((x) << 21) & GENMASK(24, 21))
 #define ANA_SG_CONFIG_REG_3_INIT_IPS_M                    GENMASK(24, 21)
 #define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x)                 (((x) & GENMASK(24, 21)) >> 21)
+#define ANA_SG_CONFIG_REG_3_IPV_VALID                     BIT(24)
+#define ANA_SG_CONFIG_REG_3_IPV_INVALID(x)                (((x) << 24) & GENMASK(24, 24))
+#define ANA_SG_CONFIG_REG_3_INIT_IPV(x)                   (((x) << 21) & GENMASK(23, 21))
+#define ANA_SG_CONFIG_REG_3_INIT_IPV_M                    GENMASK(23, 21)
+#define ANA_SG_CONFIG_REG_3_INIT_IPV_X(x)                 (((x) & GENMASK(23, 21)) >> 21)
 #define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE               BIT(25)
 
 #define ANA_SG_GCL_GS_CONFIG_RSZ                          0x4
-- 
cgit v1.2.3


From 77043c37096d4753b9f40e51445f31eb9dc40295 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 18 Nov 2021 18:12:02 +0800
Subject: net: mscc: ocelot: use index to set vcap policer

Policer was previously automatically assigned from the highest index to
the lowest index from policer pool. But police action of tc flower now
uses index to set an police entry. This patch uses the police index to
set vcap policers, so that one policer can be shared by multiple rules.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 5ea72d274d7f..2a41685b5c7d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -562,10 +562,17 @@ struct ocelot_ops {
 			      struct flow_stats *stats);
 };
 
+struct ocelot_vcap_policer {
+	struct list_head pol_list;
+	u16 base;
+	u16 max;
+	u16 base2;
+	u16 max2;
+};
+
 struct ocelot_vcap_block {
 	struct list_head rules;
 	int count;
-	int pol_lpr;
 };
 
 struct ocelot_bridge_vlan {
@@ -691,6 +698,7 @@ struct ocelot {
 
 	struct list_head		dummy_rules;
 	struct ocelot_vcap_block	block[3];
+	struct ocelot_vcap_policer	vcap_pol;
 	struct vcap_props		*vcap;
 
 	struct ocelot_psfp_list		psfp;
@@ -905,6 +913,10 @@ int ocelot_mact_learn_streamdata(struct ocelot *ocelot, int dst_idx,
 				 enum macaccess_entry_type type,
 				 int sfid, int ssid);
 
+int ocelot_vcap_policer_add(struct ocelot *ocelot, u32 pol_ix,
+			    struct ocelot_policer *pol);
+int ocelot_vcap_policer_del(struct ocelot *ocelot, u32 pol_ix);
+
 #if IS_ENABLED(CONFIG_BRIDGE_MRP)
 int ocelot_mrp_add(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp);
-- 
cgit v1.2.3


From a7e13edf37beee65f2c2ec60c42e5fb89a2958ce Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 18 Nov 2021 18:12:04 +0800
Subject: net: dsa: felix: restrict psfp rules on ingress port

PSFP rules take effect on the streams from any port of VSC9959 switch.
This patch use ingress port to limit the rule only active on this port.

Each stream can only match two ingress source ports in VSC9959. Streams
from lowest port gets the configuration of SFID pointed by MAC Table
lookup and streams from highest port gets the configuration of (SFID+1)
pointed by MAC Table lookup. This patch defines the PSFP rule on highest
port as dummy rule, which means that it does not modify the MAC table.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 2a41685b5c7d..89d17629efe5 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -556,7 +556,8 @@ struct ocelot_ops {
 	u16 (*wm_dec)(u16 value);
 	void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse);
 	void (*psfp_init)(struct ocelot *ocelot);
-	int (*psfp_filter_add)(struct ocelot *ocelot, struct flow_cls_offload *f);
+	int (*psfp_filter_add)(struct ocelot *ocelot, int port,
+			       struct flow_cls_offload *f);
 	int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f);
 	int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f,
 			      struct flow_stats *stats);
-- 
cgit v1.2.3


From 6966df483d7b5b218aeb0e13e7e334a8fc3c1744 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 18 Nov 2021 13:49:51 +0200
Subject: regulator: Update protection IRQ helper docs

The documentation of IRQ notification helper had still references to
first RFC implementation which called BUG() while trying to protect the
hardware. Behaviour was improved as calling the BUG() was not a proper
solution. Current implementation attempts to call poweroff if handling
of potentially damaging error notification fails. Update the
documentation to reflect the actual behaviour.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/0c9cc4bcf20c3da66fd5a85c97ee4288e5727538.1637233864.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bd7a73db2e66..54cf566616ae 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -499,7 +499,8 @@ struct regulator_irq_data {
  *		best to shut-down regulator(s) or reboot the SOC if error
  *		handling is repeatedly failing. If fatal_cnt is given the IRQ
  *		handling is aborted if it fails for fatal_cnt times and die()
- *		callback (if populated) or BUG() is called to try to prevent
+ *		callback (if populated) is called. If die() is not populated
+ *		poweroff for the system is attempted in order to prevent any
  *		further damage.
  * @reread_ms:	The time which is waited before attempting to re-read status
  *		at the worker if IC reading fails. Immediate re-read is done
@@ -516,11 +517,12 @@ struct regulator_irq_data {
  * @data:	Driver private data pointer which will be passed as such to
  *		the renable, map_event and die callbacks in regulator_irq_data.
  * @die:	Protection callback. If IC status reading or recovery actions
- *		fail fatal_cnt times this callback or BUG() is called. This
- *		callback should implement a final protection attempt like
- *		disabling the regulator. If protection succeeded this may
- *		return 0. If anything else is returned the core assumes final
- *		protection failed and calls BUG() as a last resort.
+ *		fail fatal_cnt times this callback is called or system is
+ *		powered off. This callback should implement a final protection
+ *		attempt like disabling the regulator. If protection succeeded
+ *		die() may return 0. If anything else is returned the core
+ *		assumes final protection failed and attempts to perform a
+ *		poweroff as a last resort.
  * @map_event:	Driver callback to map IRQ status into regulator devices with
  *		events / errors. NOTE: callback MUST initialize both the
  *		errors and notifs for all rdevs which it signals having
-- 
cgit v1.2.3


From e6feefa541f309afed8aa54431681261bc57bcde Mon Sep 17 00:00:00 2001
From: YC Hung <yc.hung@mediatek.com>
Date: Thu, 18 Nov 2021 12:07:43 +0200
Subject: ASoC: SOF: tokens: add token for Mediatek AFE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the definition for Mediatek audio front end(AFE) tokens,include
AFE sampling rate, channels, and format.

Signed-off-by: YC Hung <yc.hung@mediatek.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://lore.kernel.org/r/20211118100749.54628-3-daniel.baluta@oss.nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/sof/tokens.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index 02b71a8deea4..b72fa385bebf 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -140,4 +140,9 @@
 #define SOF_TKN_INTEL_HDA_RATE			1500
 #define SOF_TKN_INTEL_HDA_CH			1501
 
+/* AFE */
+#define SOF_TKN_MEDIATEK_AFE_RATE		1600
+#define SOF_TKN_MEDIATEK_AFE_CH			1601
+#define SOF_TKN_MEDIATEK_AFE_FORMAT		1602
+
 #endif
-- 
cgit v1.2.3


From b72bfcffcfc11858a8fc92998733372606db485e Mon Sep 17 00:00:00 2001
From: YC Hung <yc.hung@mediatek.com>
Date: Thu, 18 Nov 2021 12:07:44 +0200
Subject: ASoC: SOF: topology: Add support for Mediatek AFE DAI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new sof dai and config to pass topology file configuration
to SOF firmware running on Mediatek platform DSP core.
Add mediatek audio front end(AFE) to the list of supported sof_dais

Signed-off-by: YC Hung <yc.hung@mediatek.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Link: https://lore.kernel.org/r/20211118100749.54628-4-daniel.baluta@oss.nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai-mediatek.h | 23 +++++++++++++++++++++++
 include/sound/sof/dai.h          |  3 +++
 2 files changed, 26 insertions(+)
 create mode 100644 include/sound/sof/dai-mediatek.h

(limited to 'include')

diff --git a/include/sound/sof/dai-mediatek.h b/include/sound/sof/dai-mediatek.h
new file mode 100644
index 000000000000..62dd4720558d
--- /dev/null
+++ b/include/sound/sof/dai-mediatek.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2021 Mediatek Corporation. All rights reserved.
+ *
+ * Author: Bo Pan <bo.pan@mediatek.com>
+ */
+
+#ifndef __INCLUDE_SOUND_SOF_DAI_MEDIATEK_H__
+#define __INCLUDE_SOUND_SOF_DAI_MEDIATEK_H__
+
+#include <sound/sof/header.h>
+
+struct sof_ipc_dai_mtk_afe_params {
+	struct sof_ipc_hdr hdr;
+	u32 channels;
+	u32 rate;
+	u32 format;
+	u32 stream_id;
+	u32 reserved[4]; /* reserve for future */
+} __packed;
+
+#endif
+
diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 3782127a7095..5132bc60f54b 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -13,6 +13,7 @@
 #include <sound/sof/dai-intel.h>
 #include <sound/sof/dai-imx.h>
 #include <sound/sof/dai-amd.h>
+#include <sound/sof/dai-mediatek.h>
 
 /*
  * DAI Configuration.
@@ -70,6 +71,7 @@ enum sof_ipc_dai_type {
 	SOF_DAI_AMD_BT,			/**< AMD ACP BT*/
 	SOF_DAI_AMD_SP,			/**< AMD ACP SP */
 	SOF_DAI_AMD_DMIC,		/**< AMD ACP DMIC */
+	SOF_DAI_MEDIATEK_AFE,		/**< Mediatek AFE */
 };
 
 /* general purpose DAI configuration */
@@ -97,6 +99,7 @@ struct sof_ipc_dai_config {
 		struct sof_ipc_dai_acp_params acpbt;
 		struct sof_ipc_dai_acp_params acpsp;
 		struct sof_ipc_dai_acp_params acpdmic;
+		struct sof_ipc_dai_mtk_afe_params afe;
 	};
 } __packed;
 
-- 
cgit v1.2.3


From 8b6e88555971eac384b89fb0bd6c72ee4e1e6a6a Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 18 Nov 2021 13:48:47 +0200
Subject: regulator: rohm-regulator: add helper for restricted voltage setting

Few ROHM PMICs have regulators where voltage setting can be done only
when regulator is disabled. Add helper for those PMICs.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/6f51871e9fea611d133b5dd2560f4a7ee1ede9cd.1637233864.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/rohm-generic.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 35b392a0d73a..35c5866f48b7 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -80,6 +80,8 @@ int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs,
 				  const struct regulator_desc *desc,
 				  struct regmap *regmap);
 
+int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev,
+					      unsigned int sel);
 #else
 static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs,
 						struct device_node *np,
@@ -88,6 +90,11 @@ static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dv
 {
 	return 0;
 }
+static int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev,
+						     unsigned int sel)
+{
+	return 0;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 92b1348277f8893671e5354adde64fe3cf462821 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 18 Nov 2021 13:49:30 +0200
Subject: regulator: Add units to limit documentation

The documentation for limits used at protection level setting
did not mention the units. Fix the units in documentation to
match values passed in from device-tree (uV, uA, Kelvin) to
avoid confusion.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/111114aca991e41e49a32f89b74e95285f07c1e3.1637233864.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bd7a73db2e66..1cb8071fee34 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -101,11 +101,13 @@ enum regulator_detection_severity {
  *		is requested.
  * @set_over_voltage_protection: Support enabling of and setting limits for over
  *	voltage situation detection. Detection can be configured for same
- *	severities as over current protection.
+ *	severities as over current protection. Units of uV.
  * @set_under_voltage_protection: Support enabling of and setting limits for
- *	under situation detection.
+ *	under voltage situation detection. Detection can be configured for same
+ *	severities as over current protection. Units of uV.
  * @set_thermal_protection: Support enabling of and setting limits for over
- *	temperature situation detection.
+ *	temperature situation detection.Detection can be configured for same
+ *	severities as over current protection. Units of degree Kelvin.
  *
  * @set_active_discharge: Set active discharge enable/disable of regulators.
  *
-- 
cgit v1.2.3


From 418e0a3551bbef5b221705b0e5b8412cdc0afd39 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 5 Nov 2021 14:42:24 +0200
Subject: lib/string_helpers: Introduce kasprintf_strarray()

We have a few users already that basically want to have array of
sequential strings to be allocated and filled.

Provide a helper for them (basically adjusted version from gpio-mockup.c).

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/string_helpers.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 4ba39e1403b2..f67a94013c87 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -100,6 +100,7 @@ char *kstrdup_quotable(const char *src, gfp_t gfp);
 char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp);
 char *kstrdup_quotable_file(struct file *file, gfp_t gfp);
 
+char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n);
 void kfree_strarray(char **array, size_t n);
 
 #endif
-- 
cgit v1.2.3


From acdb89b6c87a2d7b5c48a82756e6f5c6f599f60a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 5 Nov 2021 14:42:25 +0200
Subject: lib/string_helpers: Introduce managed variant of kasprintf_strarray()

Some of the users want to have easy way to allocate array of strings
that will be automatically cleaned when associated device is gone.

Introduce managed variant of kasprintf_strarray() for such use cases.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/string_helpers.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index f67a94013c87..7a22921c9db7 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -7,6 +7,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 
+struct device;
 struct file;
 struct task_struct;
 
@@ -103,4 +104,6 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp);
 char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n);
 void kfree_strarray(char **array, size_t n);
 
+char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n);
+
 #endif
-- 
cgit v1.2.3


From 57bdeef4716689d9b0e3571034d65cf420f6efcd Mon Sep 17 00:00:00 2001
From: Naveen Naidu <naveennaidu479@gmail.com>
Date: Thu, 18 Nov 2021 19:33:11 +0530
Subject: PCI: Add PCI_ERROR_RESPONSE and related definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A config or MMIO read from a PCI device that doesn't exist or doesn't
respond causes a PCI error. There's no real data to return to satisfy the
CPU read, so most hardware fabricates ~0 data.

Add a PCI_ERROR_RESPONSE definition for that and use it where appropriate
to make these checks consistent and easier to find.

Also add helper definitions PCI_SET_ERROR_RESPONSE() and
PCI_POSSIBLE_ERROR() to make the code more readable.

Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/55563bf4dfc5d3fdc96695373c659d099bf175b1.1637243717.git.naveennaidu479@gmail.com
Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Pali Rohár <pali@kernel.org>
---
 include/linux/pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 18a75c8e615c..0ce26850470e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -154,6 +154,15 @@ enum pci_interrupt_pin {
 /* The number of legacy PCI INTx interrupts */
 #define PCI_NUM_INTX	4
 
+/*
+ * Reading from a device that doesn't respond typically returns ~0.  A
+ * successful read from a device may also return ~0, so you need additional
+ * information to reliably identify errors.
+ */
+#define PCI_ERROR_RESPONSE		(~0ULL)
+#define PCI_SET_ERROR_RESPONSE(val)	(*(val) = ((typeof(*(val))) PCI_ERROR_RESPONSE))
+#define PCI_POSSIBLE_ERROR(val)		((val) == ((typeof(val)) PCI_ERROR_RESPONSE))
+
 /*
  * pci_power_t values must match the bits in the Capabilities PME_Support
  * and Control/Status PowerState fields in the Power Management capability.
-- 
cgit v1.2.3


From c035713998700e8843c7d087f55bce3c54c0e3ec Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 5 Nov 2021 10:19:05 -0400
Subject: mm: Add functions to zero portions of a folio

These functions are wrappers around zero_user_segments(), which means
that zero_user_segments() can now be called for compound pages even when
CONFIG_TRANSPARENT_HUGEPAGE is disabled.

Use 'xend' as the name of the parameter to indicate that this is an
excluded end, not the more usual included end.  Excluding the end makes
more sense to the callers, but can cause confusion to readers who are
more used to seeing included ends.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/highmem.h | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index c944b3b70ee7..39bb9b47fa9c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -230,10 +230,10 @@ static inline void tag_clear_highpage(struct page *page)
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
  */
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#ifdef CONFIG_HIGHMEM
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2);
-#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#else
 static inline void zero_user_segments(struct page *page,
 		unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2)
@@ -253,7 +253,7 @@ static inline void zero_user_segments(struct page *page,
 	for (i = 0; i < compound_nr(page); i++)
 		flush_dcache_page(page + i);
 }
-#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#endif
 
 static inline void zero_user_segment(struct page *page,
 	unsigned start, unsigned end)
@@ -363,4 +363,42 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len)
 	kunmap_local(addr);
 }
 
+/**
+ * folio_zero_segments() - Zero two byte ranges in a folio.
+ * @folio: The folio to write to.
+ * @start1: The first byte to zero.
+ * @xend1: One more than the last byte in the first range.
+ * @start2: The first byte to zero in the second range.
+ * @xend2: One more than the last byte in the second range.
+ */
+static inline void folio_zero_segments(struct folio *folio,
+		size_t start1, size_t xend1, size_t start2, size_t xend2)
+{
+	zero_user_segments(&folio->page, start1, xend1, start2, xend2);
+}
+
+/**
+ * folio_zero_segment() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @xend: One more than the last byte to zero.
+ */
+static inline void folio_zero_segment(struct folio *folio,
+		size_t start, size_t xend)
+{
+	zero_user_segments(&folio->page, start, xend, 0, 0);
+}
+
+/**
+ * folio_zero_range() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @length: The number of bytes to zero.
+ */
+static inline void folio_zero_range(struct folio *folio,
+		size_t start, size_t length)
+{
+	zero_user_segments(&folio->page, start, start + length, 0, 0);
+}
+
 #endif /* _LINUX_HIGHMEM_H */
-- 
cgit v1.2.3


From 2475fcfbe4e383d586c5a58711e436d83a2bdfe6 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Mon, 8 Nov 2021 21:44:41 +0800
Subject: dt-bindings: power: rpmpd: Add QCM2290 support

Add compatible and constants for the power domains exposed by the
QCM2290 RPM.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211108134442.30051-3-shawn.guo@linaro.org
---
 include/dt-bindings/power/qcom-rpmpd.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h
index 960f7976a807..340b0ffe5eb8 100644
--- a/include/dt-bindings/power/qcom-rpmpd.h
+++ b/include/dt-bindings/power/qcom-rpmpd.h
@@ -219,6 +219,16 @@
 #define SM6115_VDD_LPI_CX	6
 #define SM6115_VDD_LPI_MX	7
 
+/* QCM2290 Power Domains */
+#define QCM2290_VDDCX		0
+#define QCM2290_VDDCX_AO	1
+#define QCM2290_VDDCX_VFL	2
+#define QCM2290_VDDMX		3
+#define QCM2290_VDDMX_AO	4
+#define QCM2290_VDDMX_VFL	5
+#define QCM2290_VDD_LPI_CX	6
+#define QCM2290_VDD_LPI_MX	7
+
 /* RPM SMD Power Domain performance levels */
 #define RPM_SMD_LEVEL_RETENTION       16
 #define RPM_SMD_LEVEL_RETENTION_PLUS  32
-- 
cgit v1.2.3


From 0a84486d6c1da1c2738544d8fc1b07b1d3ce046f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 16 Nov 2021 14:31:15 -0800
Subject: scsi: core: Remove Scsi_Host.shost_dev_attr_groups

Simplify the scsi_host_alloc() implementation by setting the shost_class
.dev_groups member instead of copying all host attribute group pointers
into the shost_dev_attr_groups[] array.

Link: https://lore.kernel.org/r/20211116223115.2103031-1-bvanassche@acm.org
Cc: Steffen Maier <maier@linux.ibm.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Suggested-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_host.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index ebe059badba0..72e1a347baa6 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -691,12 +691,6 @@ struct Scsi_Host {
 
 	/* ldm bits */
 	struct device		shost_gendev, shost_dev;
-	/*
-	 * The array size 3 provides space for one attribute group defined by
-	 * the SCSI core, one attribute group defined by the SCSI LLD and one
-	 * terminating NULL pointer.
-	 */
-	const struct attribute_group *shost_dev_attr_groups[3];
 
 	/*
 	 * Points to the transport data (if any) which is allocated
-- 
cgit v1.2.3


From bc2dfc02836b1133d1bf4d22aa13d48ac98eabef Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 23 Oct 2021 11:10:50 +0200
Subject: cfg80211: implement APIs for dedicated radar detection HW

If a dedicated (off-channel) radar detection hardware (chain)
is available in the hardware/driver, allow this to be used by
calling the NL80211_CMD_RADAR_DETECT command with a new flag
attribute requesting off-channel radar detection is used.

Offchannel CAC (channel availability check) avoids the CAC
downtime when switching to a radar channel or when turning on
the AP.

Drivers advertise support for this using the new feature flag
NL80211_EXT_FEATURE_RADAR_OFFCHAN.

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/7468e291ef5d05d692c1738d25b8f778d8ea5c3f.1634979655.git.lorenzo@kernel.org
Link: https://lore.kernel.org/r/1e60e60fef00e14401adae81c3d49f3e5f307537.1634979655.git.lorenzo@kernel.org
Link: https://lore.kernel.org/r/85fa50f57fc3adb2934c8d9ca0be30394de6b7e8.1634979655.git.lorenzo@kernel.org
Link: https://lore.kernel.org/r/4b6c08671ad59aae0ac46fc94c02f31b1610eb72.1634979655.git.lorenzo@kernel.org
Link: https://lore.kernel.org/r/241849ccaf2c228873c6f8495bf87b19159ba458.1634979655.git.lorenzo@kernel.org
[remove offchan_mutex, fix cfg80211_stop_offchan_radar_detection(),
 remove gfp_t argument, fix documentation, fix tracing]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 25 +++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 423f97b982ff..db8866d42a4b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4072,6 +4072,15 @@ struct mgmt_frame_regs {
  * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use
  *	those to decrypt (Re)Association Request and encrypt (Re)Association
  *	Response frame.
+ *
+ * @set_radar_offchan: Configure dedicated offchannel chain available for
+ *	radar/CAC detection on some hw. This chain can't be used to transmit
+ *	or receive frames and it is bounded to a running wdev.
+ *	Offchannel radar/CAC detection allows to avoid the CAC downtime
+ *	switching to a different channel during CAC detection on the selected
+ *	radar channel.
+ *	The caller is expected to set chandef pointer to NULL in order to
+ *	disable offchannel CAC/radar detection.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4404,6 +4413,8 @@ struct cfg80211_ops {
 				struct cfg80211_color_change_settings *params);
 	int     (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev,
 				struct cfg80211_fils_aad *fils_aad);
+	int	(*set_radar_offchan)(struct wiphy *wiphy,
+				     struct cfg80211_chan_def *chandef);
 };
 
 /*
@@ -7633,6 +7644,20 @@ void cfg80211_cac_event(struct net_device *netdev,
 			const struct cfg80211_chan_def *chandef,
 			enum nl80211_radar_event event, gfp_t gfp);
 
+/**
+ * cfg80211_offchan_cac_event - Channel Availability Check (CAC) offchan event
+ * @wiphy: the wiphy
+ * @chandef: chandef for the current channel
+ * @event: type of event
+ *
+ * This function is called when a Channel Availability Check (CAC) is finished,
+ * started or aborted by a offchannel dedicated chain.
+ *
+ * Note that this acquires the wiphy lock.
+ */
+void cfg80211_offchan_cac_event(struct wiphy *wiphy,
+				const struct cfg80211_chan_def *chandef,
+				enum nl80211_radar_event event);
 
 /**
  * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 61cab81e920d..3e734826792f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2639,6 +2639,13 @@ enum nl80211_commands {
  *	Mandatory parameter for the transmitting interface to enable MBSSID.
  *	Optional for the non-transmitting interfaces.
  *
+ * @NL80211_ATTR_RADAR_OFFCHAN: Configure dedicated offchannel chain available for
+ *	radar/CAC detection on some hw. This chain can't be used to transmit
+ *	or receive frames and it is bounded to a running wdev.
+ *	Offchannel radar/CAC detection allows to avoid the CAC downtime
+ *	switching on a different channel during CAC detection on the selected
+ *	radar channel.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3145,6 +3152,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_MBSSID_CONFIG,
 	NL80211_ATTR_MBSSID_ELEMS,
 
+	NL80211_ATTR_RADAR_OFFCHAN,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -6051,6 +6060,9 @@ enum nl80211_feature_flags {
  *	frames. Userspace has to share FILS AAD details to the driver by using
  *	@NL80211_CMD_SET_FILS_AAD.
  *
+ * @NL80211_EXT_FEATURE_RADAR_OFFCHAN: Device supports offchannel radar/CAC
+ *	detection.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -6117,6 +6129,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE,
 	NL80211_EXT_FEATURE_BSS_COLOR,
 	NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD,
+	NL80211_EXT_FEATURE_RADAR_OFFCHAN,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From 237337c230b94e78a5a0f88d1705259ab543fc40 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 23 Oct 2021 11:10:51 +0200
Subject: mac80211: introduce set_radar_offchan callback

Similar to cfg80211, introduce set_radar_offchan callback in mac80211_ops
in order to configure a dedicated offchannel chain available on some hw
(e.g. mt7915) to perform offchannel CAC detection and avoid tx/rx downtime.

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/201110606d4f3a7dfdf31440e351f2e2c375d4f0.1634979655.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dd757f0987b0..775dbb982654 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3944,6 +3944,14 @@ struct ieee80211_prep_tx_info {
  *	twt structure.
  * @twt_teardown_request: Update the hw with TWT teardown request received
  *	from the peer.
+ * @set_radar_offchan: Configure dedicated offchannel chain available for
+ *	radar/CAC detection on some hw. This chain can't be used to transmit
+ *	or receive frames and it is bounded to a running wdev.
+ *	Offchannel radar/CAC detection allows to avoid the CAC downtime
+ *	switching to a different channel during CAC detection on the selected
+ *	radar channel.
+ *	The caller is expected to set chandef pointer to NULL in order to
+ *	disable offchannel CAC/radar detection.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -4272,6 +4280,8 @@ struct ieee80211_ops {
 			      struct ieee80211_twt_setup *twt);
 	void (*twt_teardown_request)(struct ieee80211_hw *hw,
 				     struct ieee80211_sta *sta, u8 flowid);
+	int (*set_radar_offchan)(struct ieee80211_hw *hw,
+				 struct cfg80211_chan_def *chandef);
 };
 
 /**
-- 
cgit v1.2.3


From 1507b153198137dfa9cb4bec7c5dee07089ec3af Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 27 Oct 2021 11:03:42 +0200
Subject: cfg80211: move offchan_cac_event to a dedicated work

In order to make cfg80211_offchan_cac_abort() (renamed from
cfg80211_offchan_cac_event) callable in other contexts and
without so much locking restrictions, make it trigger a new
work instead of operating directly.

Do some other renames while at it to clarify.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/6145c3d0f30400a568023f67981981d24c7c6133.1635325205.git.lorenzo@kernel.org
[rewrite commit log]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index db8866d42a4b..362da9f6bf39 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7645,19 +7645,13 @@ void cfg80211_cac_event(struct net_device *netdev,
 			enum nl80211_radar_event event, gfp_t gfp);
 
 /**
- * cfg80211_offchan_cac_event - Channel Availability Check (CAC) offchan event
+ * cfg80211_offchan_cac_abort - Channel Availability Check offchan abort event
  * @wiphy: the wiphy
- * @chandef: chandef for the current channel
- * @event: type of event
- *
- * This function is called when a Channel Availability Check (CAC) is finished,
- * started or aborted by a offchannel dedicated chain.
  *
- * Note that this acquires the wiphy lock.
+ * This function is called by the driver when a Channel Availability Check
+ * (CAC) is aborted by a offchannel dedicated chain.
  */
-void cfg80211_offchan_cac_event(struct wiphy *wiphy,
-				const struct cfg80211_chan_def *chandef,
-				enum nl80211_radar_event event);
+void cfg80211_offchan_cac_abort(struct wiphy *wiphy);
 
 /**
  * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying
-- 
cgit v1.2.3


From c4f5b30dda01f2f6979a9681142de454991182ee Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Fri, 12 Nov 2021 18:44:10 +0000
Subject: reset: Add of_reset_control_get_optional_exclusive()

Add optional variant of of_reset_control_get_exclusive(). If the
requested reset is not specified in the device tree, this function
returns NULL instead of an error.

Suggested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20211112184413.4391-2-biju.das.jz@bp.renesas.com
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index db0e6115a2f6..8a21b5756c3e 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -454,6 +454,26 @@ static inline struct reset_control *of_reset_control_get_exclusive(
 	return __of_reset_control_get(node, id, 0, false, false, true);
 }
 
+/**
+ * of_reset_control_get_optional_exclusive - Lookup and obtain an optional exclusive
+ *                                           reference to a reset controller.
+ * @node: device to be reset by the controller
+ * @id: reset line name
+ *
+ * Optional variant of of_reset_control_get_exclusive(). If the requested reset
+ * is not specified in the device tree, this function returns NULL instead of
+ * an error.
+ *
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
+ *
+ * Use of id names is optional.
+ */
+static inline struct reset_control *of_reset_control_get_optional_exclusive(
+				struct device_node *node, const char *id)
+{
+	return __of_reset_control_get(node, id, 0, false, true, true);
+}
+
 /**
  * of_reset_control_get_shared - Lookup and obtain a shared reference
  *                               to a reset controller.
-- 
cgit v1.2.3


From b5d8cf0af167f3ab9f4cfe44918cde01e20a1222 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 18 Nov 2021 12:34:07 -0800
Subject: net/af_iucv: Use struct_group() to zero struct iucv_sock region

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memset(), avoid intentionally writing across
neighboring fields.

Add struct_group() to mark the region of struct iucv_sock that gets
initialized to zero. Avoid the future warning:

In function 'fortify_memset_chk',
    inlined from 'iucv_sock_alloc' at net/iucv/af_iucv.c:476:2:
./include/linux/fortify-string.h:199:4: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
  199 |    __write_overflow_field(p_size_field, size);
      |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Acked-by: Karsten Graul <kgraul@linux.ibm.com>
Link: https://lore.kernel.org/lkml/19ff61a0-0cda-6000-ce56-dc6b367c00d6@linux.ibm.com/
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iucv/af_iucv.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index ff06246dbbb9..df85d19fbf84 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -112,10 +112,12 @@ enum iucv_tx_notify {
 
 struct iucv_sock {
 	struct sock		sk;
-	char			src_user_id[8];
-	char			src_name[8];
-	char			dst_user_id[8];
-	char			dst_name[8];
+	struct_group(init,
+		char		src_user_id[8];
+		char		src_name[8];
+		char		dst_user_id[8];
+		char		dst_name[8];
+	);
 	struct list_head	accept_q;
 	spinlock_t		accept_q_lock;
 	struct sock		*parent;
-- 
cgit v1.2.3


From fcb116bc43c8c37c052530ead79872f8b2615711 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 18 Nov 2021 14:23:21 -0600
Subject: signal: Replace force_fatal_sig with force_exit_sig when in doubt

Recently to prevent issues with SECCOMP_RET_KILL and similar signals
being changed before they are delivered SA_IMMUTABLE was added.

Unfortunately this broke debuggers[1][2] which reasonably expect
to be able to trap synchronous SIGTRAP and SIGSEGV even when
the target process is not configured to handle those signals.

Add force_exit_sig and use it instead of force_fatal_sig where
historically the code has directly called do_exit.  This has the
implementation benefits of going through the signal exit path
(including generating core dumps) without the danger of allowing
userspace to ignore or change these signals.

This avoids userspace regressions as older kernels exited with do_exit
which debuggers also can not intercept.

In the future is should be possible to improve the quality of
implementation of the kernel by changing some of these force_exit_sig
calls to force_fatal_sig.  That can be done where it matters on
a case-by-case basis with careful analysis.

Reported-by: Kyle Huey <me@kylehuey.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
[1] https://lkml.kernel.org/r/CAP045AoMY4xf8aC_4QU_-j7obuEPYgTcnQQP3Yxk=2X90jtpjw@mail.gmail.com
[2] https://lkml.kernel.org/r/20211117150258.GB5403@xsang-OptiPlex-9020
Fixes: 00b06da29cf9 ("signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed")
Fixes: a3616a3c0272 ("signal/m68k: Use force_sigsegv(SIGSEGV) in fpsp040_die")
Fixes: 83a1f27ad773 ("signal/powerpc: On swapcontext failure force SIGSEGV")
Fixes: 9bc508cf0791 ("signal/s390: Use force_sigsegv in default_trap_handler")
Fixes: 086ec444f866 ("signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig")
Fixes: c317d306d550 ("signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails")
Fixes: 695dd0d634df ("signal/x86: In emulate_vsyscall force a signal instead of calling do_exit")
Fixes: 1fbd60df8a85 ("signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved.")
Fixes: 941edc5bf174 ("exit/syscall_user_dispatch: Send ordinary signals on failure")
Link: https://lkml.kernel.org/r/871r3dqfv8.fsf_-_@email.froward.int.ebiederm.org
Reviewed-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Kyle Huey <khuey@kylehuey.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 23505394ef70..33a50642cf41 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -352,6 +352,7 @@ extern __must_check bool do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int);
 extern void force_fatal_sig(int);
+extern void force_exit_sig(int);
 extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
-- 
cgit v1.2.3


From 98a1ca29768aca6e06e93f2bc4439f01ba439ac4 Mon Sep 17 00:00:00 2001
From: Lukas Middendorf <kernel@tuxforce.de>
Date: Sun, 18 Apr 2021 01:12:03 +0100
Subject: media: media dvb_frontend: add suspend and resume callbacks to
 dvb_frontend_ops

While dvb_tuner_ops already has dedicated suspend and resume callbacks,
dvb_frontend_ops currently does not have them. Add those callbacks and
use them for suspend and resume. If they are not set, the old behavior
of calling sleep or init is used.

This allows dvb_frontend drivers to handle resume differently from init,
and suspend differently from sleep. No change is required for drivers
not needing this functionality.

Link: https://lore.kernel.org/linux-media/20210418001204.7453-2-kernel@tuxforce.de

Cc: Lukas Middendorf <kernel@tuxforce.de>, Antti Palosaari <crope@iki.fi>, Mauro Carvalho Chehab <mchehab@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Lukas Middendorf <kernel@tuxforce.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/dvb_frontend.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h
index 0d76fa4551b3..e7c44870f20d 100644
--- a/include/media/dvb_frontend.h
+++ b/include/media/dvb_frontend.h
@@ -364,6 +364,10 @@ struct dvb_frontend_internal_info {
  *			allocated by the driver.
  * @init:		callback function used to initialize the tuner device.
  * @sleep:		callback function used to put the tuner to sleep.
+ * @suspend:		callback function used to inform that the Kernel will
+ *			suspend.
+ * @resume:		callback function used to inform that the Kernel is
+ *			resuming from suspend.
  * @write:		callback function used by some demod legacy drivers to
  *			allow other drivers to write data into their registers.
  *			Should not be used on new drivers.
@@ -443,6 +447,8 @@ struct dvb_frontend_ops {
 
 	int (*init)(struct dvb_frontend* fe);
 	int (*sleep)(struct dvb_frontend* fe);
+	int (*suspend)(struct dvb_frontend *fe);
+	int (*resume)(struct dvb_frontend *fe);
 
 	int (*write)(struct dvb_frontend* fe, const u8 buf[], int len);
 
@@ -755,7 +761,8 @@ void dvb_frontend_detach(struct dvb_frontend *fe);
  * &dvb_frontend_ops.tuner_ops.suspend\(\) is available, it calls it. Otherwise,
  * it will call &dvb_frontend_ops.tuner_ops.sleep\(\), if available.
  *
- * It will also call &dvb_frontend_ops.sleep\(\) to put the demod to suspend.
+ * It will also call &dvb_frontend_ops.suspend\(\) to put the demod to suspend,
+ * if available. Otherwise it will call &dvb_frontend_ops.sleep\(\).
  *
  * The drivers should also call dvb_frontend_suspend\(\) as part of their
  * handler for the &device_driver.suspend\(\).
@@ -769,7 +776,9 @@ int dvb_frontend_suspend(struct dvb_frontend *fe);
  *
  * This function resumes the usual operation of the tuner after resume.
  *
- * In order to resume the frontend, it calls the demod &dvb_frontend_ops.init\(\).
+ * In order to resume the frontend, it calls the demod
+ * &dvb_frontend_ops.resume\(\) if available. Otherwise it calls demod
+ * &dvb_frontend_ops.init\(\).
  *
  * If &dvb_frontend_ops.tuner_ops.resume\(\) is available, It, it calls it.
  * Otherwise,t will call &dvb_frontend_ops.tuner_ops.init\(\), if available.
-- 
cgit v1.2.3


From d68f50e6ad0ee7080b0244a15f2dd3d46040632a Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 18 Oct 2021 14:54:55 +0200
Subject: dt-bindings: clock: samsung: add IDs for some core clocks

Add IDs for some core clocks referenced during the boot process.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20211018125456.8292-1-m.szyprowski@samsung.com
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
---
 include/dt-bindings/clock/exynos4.h    | 4 +++-
 include/dt-bindings/clock/exynos5250.h | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos4.h b/include/dt-bindings/clock/exynos4.h
index 88ec3968b90a..acbfbab875ec 100644
--- a/include/dt-bindings/clock/exynos4.h
+++ b/include/dt-bindings/clock/exynos4.h
@@ -209,6 +209,7 @@
 #define CLK_ACLK400_MCUISP	395 /* Exynos4x12 only */
 #define CLK_MOUT_HDMI		396
 #define CLK_MOUT_MIXER		397
+#define CLK_MOUT_VPLLSRC	398
 
 /* gate clocks - ppmu */
 #define CLK_PPMULEFT		400
@@ -236,9 +237,10 @@
 #define CLK_DIV_C2C		458 /* Exynos4x12 only */
 #define CLK_DIV_GDL		459
 #define CLK_DIV_GDR		460
+#define CLK_DIV_CORE2		461
 
 /* must be greater than maximal clock id */
-#define CLK_NR_CLKS		461
+#define CLK_NR_CLKS		462
 
 /* Exynos4x12 ISP clocks */
 #define CLK_ISP_FIMC_ISP		 1
diff --git a/include/dt-bindings/clock/exynos5250.h b/include/dt-bindings/clock/exynos5250.h
index e259cc01f22f..4680da7357d3 100644
--- a/include/dt-bindings/clock/exynos5250.h
+++ b/include/dt-bindings/clock/exynos5250.h
@@ -19,6 +19,7 @@
 #define CLK_FOUT_EPLL		7
 #define CLK_FOUT_VPLL		8
 #define CLK_ARM_CLK		9
+#define CLK_DIV_ARM2		10
 
 /* gate for special clocks (sclk) */
 #define CLK_SCLK_CAM_BAYER	128
@@ -174,8 +175,9 @@
 #define CLK_MOUT_ACLK300_DISP1_SUB	1027
 #define CLK_MOUT_APLL		1028
 #define CLK_MOUT_MPLL		1029
+#define CLK_MOUT_VPLLSRC	1030
 
 /* must be greater than maximal clock id */
-#define CLK_NR_CLKS		1030
+#define CLK_NR_CLKS		1031
 
 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_5250_H */
-- 
cgit v1.2.3


From d8466f73010faf71effb21228ae1cbf577dab130 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Sat, 16 Oct 2021 14:22:27 +0100
Subject: mtd: rawnand: Export nand_read_page_hwecc_oob_first()

Move the function nand_read_page_hwecc_oob_first() (previously
nand_davinci_read_page_hwecc_oob_first()) to nand_base.c, and export it
as a GPL symbol, so that it can be used by more modules.

Cc: <stable@vger.kernel.org> # v5.2
Fixes: a0ac778eb82c ("mtd: rawnand: ingenic: Add support for the JZ4740")
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20211016132228.40254-4-paul@crapouillou.net
---
 include/linux/mtd/rawnand.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index b2f9dd3cbd69..5b88cd51fadb 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1539,6 +1539,8 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 		      bool force_8bit, bool check_only);
 int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		       unsigned int len, bool force_8bit);
+int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf,
+				   int oob_required, int page);
 
 /* Scan and identify a NAND device */
 int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips,
-- 
cgit v1.2.3


From 8d22679dc89a6d9e1d41b2514902e3f7ef51547a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 18 Nov 2021 18:23:55 -0800
Subject: ipv6: ip6_skb_dst_mtu() cleanups

Use const attribute where we can, and cache skb_dst()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20211119022355.2985984-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip6_route.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 5efd0b71dc67..ca2d6b60e1ec 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -263,19 +263,19 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		 int (*output)(struct net *, struct sock *, struct sk_buff *));
 
-static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb)
+static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
 {
-	unsigned int mtu;
-
-	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+	const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 				inet6_sk(skb->sk) : NULL;
+	const struct dst_entry *dst = skb_dst(skb);
+	unsigned int mtu;
 
 	if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
-		mtu = READ_ONCE(skb_dst(skb)->dev->mtu);
-		mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
-	} else
-		mtu = dst_mtu(skb_dst(skb));
-
+		mtu = READ_ONCE(dst->dev->mtu);
+		mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
+	} else {
+		mtu = dst_mtu(dst);
+	}
 	return mtu;
 }
 
-- 
cgit v1.2.3


From adeef3e32146a8d2a73c399dc6f5d76a449131b1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 19 Nov 2021 06:21:51 -0800
Subject: net: constify netdev->dev_addr

Commit 406f42fa0d3c ("net-next: When a bond have a massive amount
of VLANs...") introduced a rbtree for faster Ethernet address look
up. We converted all users to make modifications via appropriate
helpers, make netdev->dev_addr const.

The update helpers need to upcast from the buffer to
struct netdev_hw_addr.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4f4a299e92de..2462195784a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2117,7 +2117,7 @@ struct net_device {
  * Cache lines mostly used on receive path (including eth_type_trans())
  */
 	/* Interface address info used in eth_type_trans() */
-	unsigned char		*dev_addr;
+	const unsigned char	*dev_addr;
 
 	struct netdev_rx_queue	*_rx;
 	unsigned int		num_rx_queues;
@@ -4268,10 +4268,13 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
 void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
+void dev_addr_mod(struct net_device *dev, unsigned int offset,
+		  const void *addr, size_t len);
+
 static inline void
 __dev_addr_set(struct net_device *dev, const void *addr, size_t len)
 {
-	memcpy(dev->dev_addr, addr, len);
+	dev_addr_mod(dev, 0, addr, len);
 }
 
 static inline void dev_addr_set(struct net_device *dev, const u8 *addr)
@@ -4279,13 +4282,6 @@ static inline void dev_addr_set(struct net_device *dev, const u8 *addr)
 	__dev_addr_set(dev, addr, dev->addr_len);
 }
 
-static inline void
-dev_addr_mod(struct net_device *dev, unsigned int offset,
-	     const void *addr, size_t len)
-{
-	memcpy(&dev->dev_addr[offset], addr, len);
-}
-
 int dev_addr_add(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type);
 int dev_addr_del(struct net_device *dev, const unsigned char *addr,
-- 
cgit v1.2.3


From d07b26f5bbea9ade34dfd6abea7b3ca056c03cd1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 19 Nov 2021 06:21:53 -0800
Subject: dev_addr: add a modification check

netdev->dev_addr should only be modified via helpers,
but someone may be casting off the const. Add a runtime
check to catch abuses.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2462195784a9..cb7f2661d187 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type {
  *	@unlink_list:	As netif_addr_lock() can be called recursively,
  *			keep a list of interfaces to be deleted.
  *
+ *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2268,6 +2270,8 @@ struct net_device {
 
 	/* protected by rtnl_lock */
 	struct bpf_xdp_entity	xdp_state[__MAX_XDP_MODE];
+
+	u8 dev_addr_shadow[MAX_ADDR_LEN];
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -4288,6 +4292,7 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type);
 void dev_addr_flush(struct net_device *dev);
 int dev_addr_init(struct net_device *dev);
+void dev_addr_check(struct net_device *dev);
 
 /* Functions used for unicast addresses handling */
 int dev_uc_add(struct net_device *dev, const unsigned char *addr);
-- 
cgit v1.2.3


From 4f47d5d507d6f211ebceac76a5f0b83c2eae154b Mon Sep 17 00:00:00 2001
From: Poorva Sonparote <psonparo@redhat.com>
Date: Fri, 19 Nov 2021 12:41:34 -0800
Subject: ipv4: Exposing __ip_sock_set_tos() in ip.h

Making the static function __ip_sock_set_tos() from net/ipv4/ip_sockglue.c
accessible by declaring it in include/net/ip.h
The reason for doing this is to use this function to set IP_TOS value in
mptcp_setsockopt() without the lock.

Signed-off-by: Poorva Sonparote <psonparo@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 7d1088888c10..81e23a102a0d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -783,5 +783,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val);
 void ip_sock_set_pktinfo(struct sock *sk);
 void ip_sock_set_recverr(struct sock *sk);
 void ip_sock_set_tos(struct sock *sk, int val);
+void  __ip_sock_set_tos(struct sock *sk, int val);
 
 #endif	/* _IP_H */
-- 
cgit v1.2.3


From 85b6d24646e4125c591639841169baa98a2da503 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Date: Fri, 19 Nov 2021 16:43:21 -0800
Subject: shm: extend forced shm destroy to support objects from several IPC
 nses

Currently, the exit_shm() function not designed to work properly when
task->sysvshm.shm_clist holds shm objects from different IPC namespaces.

This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
leads to use-after-free (reproducer exists).

This is an attempt to fix the problem by extending exit_shm mechanism to
handle shm's destroy from several IPC ns'es.

To achieve that we do several things:

1. add a namespace (non-refcounted) pointer to the struct shmid_kernel

2. during new shm object creation (newseg()/shmget syscall) we
   initialize this pointer by current task IPC ns

3. exit_shm() fully reworked such that it traverses over all shp's in
   task->sysvshm.shm_clist and gets IPC namespace not from current task
   as it was before but from shp's object itself, then call
   shm_destroy(shp, ns).

Note: We need to be really careful here, because as it was said before
(1), our pointer to IPC ns non-refcnt'ed.  To be on the safe side we
using special helper get_ipc_ns_not_zero() which allows to get IPC ns
refcounter only if IPC ns not in the "state of destruction".

Q/A

Q: Why can we access shp->ns memory using non-refcounted pointer?
A: Because shp object lifetime is always shorther than IPC namespace
   lifetime, so, if we get shp object from the task->sysvshm.shm_clist
   while holding task_lock(task) nobody can steal our namespace.

Q: Does this patch change semantics of unshare/setns/clone syscalls?
A: No. It's just fixes non-covered case when process may leave IPC
   namespace without getting task->sysvshm.shm_clist list cleaned up.

Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife.com
Link: https://lkml.kernel.org/r/20211109151501.4921-1-manfred@colorfullife.com
Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity")
Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 15 +++++++++++++++
 include/linux/sched/task.h    |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 05e22770af51..b75395ec8d52 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	if (ns) {
+		if (refcount_inc_not_zero(&ns->ns.count))
+			return ns;
+	}
+
+	return NULL;
+}
+
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	return ns;
+}
+
 static inline void put_ipc_ns(struct ipc_namespace *ns)
 {
 }
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ba88a6987400..058d7f371e25 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -158,7 +158,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
+ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
-- 
cgit v1.2.3


From afe041c2d0febd83698b8b0164e6b3b1dfae0b66 Mon Sep 17 00:00:00 2001
From: Bui Quang Minh <minhquangbui99@gmail.com>
Date: Fri, 19 Nov 2021 16:43:40 -0800
Subject: hugetlb: fix hugetlb cgroup refcounting during mremap

When hugetlb_vm_op_open() is called during copy_vma(), we may take the
reference to resv_map->css.  Later, when clearing the reservation
pointer of old_vma after transferring it to new_vma, we forget to drop
the reference to resv_map->css.  This leads to a reference leak of css.

Fixes this by adding a check to drop reservation css reference in
clear_vma_resv_huge_pages()

Link: https://lkml.kernel.org/r/20211113154412.91134-1-minhquangbui99@gmail.com
Fixes: 550a7d60bd5e35 ("mm, hugepages: add mremap() support for hugepage backed vma")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index c137396129db..ba025ae27882 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -128,6 +128,13 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
 		css_get(resv_map->css);
 }
 
+static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+	if (resv_map->css)
+		css_put(resv_map->css);
+}
+
 extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					struct hugetlb_cgroup **ptr);
 extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -211,6 +218,11 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
 {
 }
 
+static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+}
+
 static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					       struct hugetlb_cgroup **ptr)
 {
-- 
cgit v1.2.3


From f65b8132092699e4f672111836f3f51c00c354f2 Mon Sep 17 00:00:00 2001
From: Elyes HAOUAS <ehaouas@noos.fr>
Date: Thu, 28 Oct 2021 23:05:17 +0200
Subject: include/linux/efi.h: Remove unneeded whitespaces before tabs

Signed-off-by: Elyes HAOUAS <ehaouas@noos.fr>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index dbd39b20e034..de36fb547602 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -570,8 +570,8 @@ extern struct efi {
 	unsigned long			flags;
 } efi;
 
-#define EFI_RT_SUPPORTED_GET_TIME 				0x0001
-#define EFI_RT_SUPPORTED_SET_TIME 				0x0002
+#define EFI_RT_SUPPORTED_GET_TIME				0x0001
+#define EFI_RT_SUPPORTED_SET_TIME				0x0002
 #define EFI_RT_SUPPORTED_GET_WAKEUP_TIME			0x0004
 #define EFI_RT_SUPPORTED_SET_WAKEUP_TIME			0x0008
 #define EFI_RT_SUPPORTED_GET_VARIABLE				0x0010
@@ -838,7 +838,7 @@ extern int efi_status_to_err(efi_status_t status);
 #define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS 0x0000000000000020
 #define EFI_VARIABLE_APPEND_WRITE	0x0000000000000040
 
-#define EFI_VARIABLE_MASK 	(EFI_VARIABLE_NON_VOLATILE | \
+#define EFI_VARIABLE_MASK	(EFI_VARIABLE_NON_VOLATILE | \
 				EFI_VARIABLE_BOOTSERVICE_ACCESS | \
 				EFI_VARIABLE_RUNTIME_ACCESS | \
 				EFI_VARIABLE_HARDWARE_ERROR_RECORD | \
-- 
cgit v1.2.3


From 3218910fd5858842a1dd98ce92b602f0878f8210 Mon Sep 17 00:00:00 2001
From: Adrian Larumbe <adrianml@alumnos.upm.es>
Date: Mon, 1 Nov 2021 18:08:24 +0000
Subject: dmaengine: Add core function and capability check for DMA_MEMCPY_SG

This is the old DMA_SG interface that was removed in commit
c678fa66341c ("dmaengine: remove DMA_SG as it is dead code in kernel"). It
has been renamed to DMA_MEMCPY_SG to better match the MEMSET and MEMSET_SG
naming convention.

It should only be used for mem2mem copies, either main system memory or
CPU-addressable device memory (like video memory on a PCI graphics card).

Bringing back this interface was prompted by the need to use the Xilinx
CDMA device for mem2mem SG transfers.

Signed-off-by: Adrian Larumbe <adrianml@alumnos.upm.es>
Link: https://lore.kernel.org/r/20211101180825.241048-3-adrianml@alumnos.upm.es
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 9000f3ffce8b..554a86665de9 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -50,6 +50,7 @@ enum dma_status {
  */
 enum dma_transaction_type {
 	DMA_MEMCPY,
+	DMA_MEMCPY_SG,
 	DMA_XOR,
 	DMA_PQ,
 	DMA_XOR_VAL,
@@ -891,6 +892,11 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
 		struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
 		size_t len, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
 		struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 		unsigned int src_cnt, size_t len, unsigned long flags);
@@ -1051,6 +1057,20 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
 						    len, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg)
+		return NULL;
+
+	return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents,
+						       src_sg, src_nents,
+						       flags);
+}
+
 static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan,
 		enum dma_desc_metadata_mode mode)
 {
-- 
cgit v1.2.3


From b88dbe38dca82425a273d126785866af39ba0770 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Date: Tue, 16 Nov 2021 14:38:35 +0000
Subject: media: uapi: Add VP9 stateless decoder controls

Add the VP9 stateless decoder controls plus the documentation that goes
with it.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-developed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Co-developed-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-ctrls.h         |   4 +
 include/uapi/linux/v4l2-controls.h | 284 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/videodev2.h     |   6 +
 3 files changed, 294 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 575b59fbac77..b3ce438f1329 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -50,6 +50,8 @@ struct video_device;
  * @p_h264_decode_params:	Pointer to a struct v4l2_ctrl_h264_decode_params.
  * @p_h264_pred_weights:	Pointer to a struct v4l2_ctrl_h264_pred_weights.
  * @p_vp8_frame:		Pointer to a VP8 frame params structure.
+ * @p_vp9_compressed_hdr_probs:	Pointer to a VP9 frame compressed header probs structure.
+ * @p_vp9_frame:		Pointer to a VP9 frame params structure.
  * @p_hevc_sps:			Pointer to an HEVC sequence parameter set structure.
  * @p_hevc_pps:			Pointer to an HEVC picture parameter set structure.
  * @p_hevc_slice_params:	Pointer to an HEVC slice parameters structure.
@@ -80,6 +82,8 @@ union v4l2_ctrl_ptr {
 	struct v4l2_ctrl_hevc_sps *p_hevc_sps;
 	struct v4l2_ctrl_hevc_pps *p_hevc_pps;
 	struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params;
+	struct v4l2_ctrl_vp9_compressed_hdr *p_vp9_compressed_hdr_probs;
+	struct v4l2_ctrl_vp9_frame *p_vp9_frame;
 	struct v4l2_ctrl_hdr10_cll_info *p_hdr10_cll;
 	struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering;
 	struct v4l2_area *p_area;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index c9eea93a43a9..c8e0f84d204d 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -2018,6 +2018,290 @@ struct v4l2_ctrl_hdr10_mastering_display {
 	__u32 min_display_mastering_luminance;
 };
 
+/* Stateless VP9 controls */
+
+#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED	0x1
+#define	V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE	0x2
+
+/**
+ * struct v4l2_vp9_loop_filter - VP9 loop filter parameters
+ *
+ * @ref_deltas: contains the adjustment needed for the filter level based on the
+ * chosen reference frame. If this syntax element is not present in the bitstream,
+ * users should pass its last value.
+ * @mode_deltas: contains the adjustment needed for the filter level based on the
+ * chosen mode.	If this syntax element is not present in the bitstream, users should
+ * pass its last value.
+ * @level: indicates the loop filter strength.
+ * @sharpness: indicates the sharpness level.
+ * @flags: combination of V4L2_VP9_LOOP_FILTER_FLAG_{} flags.
+ * @reserved: padding field. Should be zeroed by applications.
+ *
+ * This structure contains all loop filter related parameters. See sections
+ * '7.2.8 Loop filter semantics' of the VP9 specification for more details.
+ */
+struct v4l2_vp9_loop_filter {
+	__s8 ref_deltas[4];
+	__s8 mode_deltas[2];
+	__u8 level;
+	__u8 sharpness;
+	__u8 flags;
+	__u8 reserved[7];
+};
+
+/**
+ * struct v4l2_vp9_quantization - VP9 quantization parameters
+ *
+ * @base_q_idx: indicates the base frame qindex.
+ * @delta_q_y_dc: indicates the Y DC quantizer relative to base_q_idx.
+ * @delta_q_uv_dc: indicates the UV DC quantizer relative to base_q_idx.
+ * @delta_q_uv_ac: indicates the UV AC quantizer relative to base_q_idx.
+ * @reserved: padding field. Should be zeroed by applications.
+ *
+ * Encodes the quantization parameters. See section '7.2.9 Quantization params
+ * syntax' of the VP9 specification for more details.
+ */
+struct v4l2_vp9_quantization {
+	__u8 base_q_idx;
+	__s8 delta_q_y_dc;
+	__s8 delta_q_uv_dc;
+	__s8 delta_q_uv_ac;
+	__u8 reserved[4];
+};
+
+#define V4L2_VP9_SEGMENTATION_FLAG_ENABLED		0x01
+#define V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP		0x02
+#define V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE	0x04
+#define V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA		0x08
+#define V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE	0x10
+
+#define V4L2_VP9_SEG_LVL_ALT_Q				0
+#define V4L2_VP9_SEG_LVL_ALT_L				1
+#define V4L2_VP9_SEG_LVL_REF_FRAME			2
+#define V4L2_VP9_SEG_LVL_SKIP				3
+#define V4L2_VP9_SEG_LVL_MAX				4
+
+#define V4L2_VP9_SEGMENT_FEATURE_ENABLED(id)	(1 << (id))
+#define V4L2_VP9_SEGMENT_FEATURE_ENABLED_MASK	0xf
+
+/**
+ * struct v4l2_vp9_segmentation - VP9 segmentation parameters
+ *
+ * @feature_data: data attached to each feature. Data entry is only valid if
+ * the feature is enabled. The array shall be indexed with segment number as
+ * the first dimension (0..7) and one of V4L2_VP9_SEG_{} as the second dimension.
+ * @feature_enabled: bitmask defining which features are enabled in each segment.
+ * The value for each segment is a combination of V4L2_VP9_SEGMENT_FEATURE_ENABLED(id)
+ * values where id is one of V4L2_VP9_SEG_LVL_{}.
+ * @tree_probs: specifies the probability values to be used when decoding a
+ * Segment-ID. See '5.15. Segmentation map' section of the VP9 specification
+ * for more details.
+ * @pred_probs: specifies the probability values to be used when decoding a
+ * Predicted-Segment-ID. See '6.4.14. Get segment id syntax' section of :ref:`vp9`
+ * for more details.
+ * @flags: combination of V4L2_VP9_SEGMENTATION_FLAG_{} flags.
+ * @reserved: padding field. Should be zeroed by applications.
+ *
+ * Encodes the quantization parameters. See section '7.2.10 Segmentation params syntax' of
+ * the VP9 specification for more details.
+ */
+struct v4l2_vp9_segmentation {
+	__s16 feature_data[8][4];
+	__u8 feature_enabled[8];
+	__u8 tree_probs[7];
+	__u8 pred_probs[3];
+	__u8 flags;
+	__u8 reserved[5];
+};
+
+#define V4L2_VP9_FRAME_FLAG_KEY_FRAME			0x001
+#define V4L2_VP9_FRAME_FLAG_SHOW_FRAME			0x002
+#define V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT		0x004
+#define V4L2_VP9_FRAME_FLAG_INTRA_ONLY			0x008
+#define V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV		0x010
+#define V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX		0x020
+#define V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE		0x040
+#define V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING		0x080
+#define V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING		0x100
+#define V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING	0x200
+
+#define V4L2_VP9_SIGN_BIAS_LAST				0x1
+#define V4L2_VP9_SIGN_BIAS_GOLDEN			0x2
+#define V4L2_VP9_SIGN_BIAS_ALT				0x4
+
+#define V4L2_VP9_RESET_FRAME_CTX_NONE			0
+#define V4L2_VP9_RESET_FRAME_CTX_SPEC			1
+#define V4L2_VP9_RESET_FRAME_CTX_ALL			2
+
+#define V4L2_VP9_INTERP_FILTER_EIGHTTAP			0
+#define V4L2_VP9_INTERP_FILTER_EIGHTTAP_SMOOTH		1
+#define V4L2_VP9_INTERP_FILTER_EIGHTTAP_SHARP		2
+#define V4L2_VP9_INTERP_FILTER_BILINEAR			3
+#define V4L2_VP9_INTERP_FILTER_SWITCHABLE		4
+
+#define V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE	0
+#define V4L2_VP9_REFERENCE_MODE_COMPOUND_REFERENCE	1
+#define V4L2_VP9_REFERENCE_MODE_SELECT			2
+
+#define V4L2_VP9_PROFILE_MAX				3
+
+#define V4L2_CID_STATELESS_VP9_FRAME	(V4L2_CID_CODEC_STATELESS_BASE + 300)
+/**
+ * struct v4l2_ctrl_vp9_frame - VP9 frame decoding control
+ *
+ * @lf: loop filter parameters. See &v4l2_vp9_loop_filter for more details.
+ * @quant: quantization parameters. See &v4l2_vp9_quantization for more details.
+ * @seg: segmentation parameters. See &v4l2_vp9_segmentation for more details.
+ * @flags: combination of V4L2_VP9_FRAME_FLAG_{} flags.
+ * @compressed_header_size: compressed header size in bytes.
+ * @uncompressed_header_size: uncompressed header size in bytes.
+ * @frame_width_minus_1: add 1 to it and you'll get the frame width expressed in pixels.
+ * @frame_height_minus_1: add 1 to it and you'll get the frame height expressed in pixels.
+ * @render_width_minus_1: add 1 to it and you'll get the expected render width expressed in
+ * pixels. This is not used during the decoding process but might be used by HW scalers
+ * to prepare a frame that's ready for scanout.
+ * @render_height_minus_1: add 1 to it and you'll get the expected render height expressed in
+ * pixels. This is not used during the decoding process but might be used by HW scalers
+ * to prepare a frame that's ready for scanout.
+ * @last_frame_ts: "last" reference buffer timestamp.
+ * The timestamp refers to the timestamp field in struct v4l2_buffer.
+ * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64.
+ * @golden_frame_ts: "golden" reference buffer timestamp.
+ * The timestamp refers to the timestamp field in struct v4l2_buffer.
+ * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64.
+ * @alt_frame_ts: "alt" reference buffer timestamp.
+ * The timestamp refers to the timestamp field in struct v4l2_buffer.
+ * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64.
+ * @ref_frame_sign_bias: a bitfield specifying whether the sign bias is set for a given
+ * reference frame. Either of V4L2_VP9_SIGN_BIAS_{}.
+ * @reset_frame_context: specifies whether the frame context should be reset to default values.
+ * Either of V4L2_VP9_RESET_FRAME_CTX_{}.
+ * @frame_context_idx: frame context that should be used/updated.
+ * @profile: VP9 profile. Can be 0, 1, 2 or 3.
+ * @bit_depth: bits per components. Can be 8, 10 or 12. Note that not all profiles support
+ * 10 and/or 12 bits depths.
+ * @interpolation_filter: specifies the filter selection used for performing inter prediction.
+ * Set to one of V4L2_VP9_INTERP_FILTER_{}.
+ * @tile_cols_log2: specifies the base 2 logarithm of the width of each tile (where the width
+ * is measured in units of 8x8 blocks). Shall be less than or equal to 6.
+ * @tile_rows_log2: specifies the base 2 logarithm of the height of each tile (where the height
+ * is measured in units of 8x8 blocks).
+ * @reference_mode: specifies the type of inter prediction to be used.
+ * Set to one of V4L2_VP9_REFERENCE_MODE_{}.
+ * @reserved: padding field. Should be zeroed by applications.
+ */
+struct v4l2_ctrl_vp9_frame {
+	struct v4l2_vp9_loop_filter lf;
+	struct v4l2_vp9_quantization quant;
+	struct v4l2_vp9_segmentation seg;
+	__u32 flags;
+	__u16 compressed_header_size;
+	__u16 uncompressed_header_size;
+	__u16 frame_width_minus_1;
+	__u16 frame_height_minus_1;
+	__u16 render_width_minus_1;
+	__u16 render_height_minus_1;
+	__u64 last_frame_ts;
+	__u64 golden_frame_ts;
+	__u64 alt_frame_ts;
+	__u8 ref_frame_sign_bias;
+	__u8 reset_frame_context;
+	__u8 frame_context_idx;
+	__u8 profile;
+	__u8 bit_depth;
+	__u8 interpolation_filter;
+	__u8 tile_cols_log2;
+	__u8 tile_rows_log2;
+	__u8 reference_mode;
+	__u8 reserved[7];
+};
+
+#define V4L2_VP9_NUM_FRAME_CTX	4
+
+/**
+ * struct v4l2_vp9_mv_probs - VP9 Motion vector probability updates
+ * @joint: motion vector joint probability updates.
+ * @sign: motion vector sign probability updates.
+ * @classes: motion vector class probability updates.
+ * @class0_bit: motion vector class0 bit probability updates.
+ * @bits: motion vector bits probability updates.
+ * @class0_fr: motion vector class0 fractional bit probability updates.
+ * @fr: motion vector fractional bit probability updates.
+ * @class0_hp: motion vector class0 high precision fractional bit probability updates.
+ * @hp: motion vector high precision fractional bit probability updates.
+ *
+ * This structure contains new values of motion vector probabilities.
+ * A value of zero in an array element means there is no update of the relevant probability.
+ * See `struct v4l2_vp9_prob_updates` for details.
+ */
+struct v4l2_vp9_mv_probs {
+	__u8 joint[3];
+	__u8 sign[2];
+	__u8 classes[2][10];
+	__u8 class0_bit[2];
+	__u8 bits[2][10];
+	__u8 class0_fr[2][2][3];
+	__u8 fr[2][3];
+	__u8 class0_hp[2];
+	__u8 hp[2];
+};
+
+#define V4L2_CID_STATELESS_VP9_COMPRESSED_HDR	(V4L2_CID_CODEC_STATELESS_BASE + 301)
+
+#define V4L2_VP9_TX_MODE_ONLY_4X4			0
+#define V4L2_VP9_TX_MODE_ALLOW_8X8			1
+#define V4L2_VP9_TX_MODE_ALLOW_16X16			2
+#define V4L2_VP9_TX_MODE_ALLOW_32X32			3
+#define V4L2_VP9_TX_MODE_SELECT				4
+
+/**
+ * struct v4l2_ctrl_vp9_compressed_hdr - VP9 probability updates control
+ * @tx_mode: specifies the TX mode. Set to one of V4L2_VP9_TX_MODE_{}.
+ * @tx8: TX 8x8 probability updates.
+ * @tx16: TX 16x16 probability updates.
+ * @tx32: TX 32x32 probability updates.
+ * @coef: coefficient probability updates.
+ * @skip: skip probability updates.
+ * @inter_mode: inter mode probability updates.
+ * @interp_filter: interpolation filter probability updates.
+ * @is_inter: is inter-block probability updates.
+ * @comp_mode: compound prediction mode probability updates.
+ * @single_ref: single ref probability updates.
+ * @comp_ref: compound ref probability updates.
+ * @y_mode: Y prediction mode probability updates.
+ * @uv_mode: UV prediction mode probability updates.
+ * @partition: partition probability updates.
+ * @mv: motion vector probability updates.
+ *
+ * This structure holds the probabilities update as parsed in the compressed
+ * header (Spec 6.3). These values represent the value of probability update after
+ * being translated with inv_map_table[] (see 6.3.5). A value of zero in an array element
+ * means that there is no update of the relevant probability.
+ *
+ * This control is optional and needs to be used when dealing with the hardware which is
+ * not capable of parsing the compressed header itself. Only drivers which need it will
+ * implement it.
+ */
+struct v4l2_ctrl_vp9_compressed_hdr {
+	__u8 tx_mode;
+	__u8 tx8[2][1];
+	__u8 tx16[2][2];
+	__u8 tx32[2][3];
+	__u8 coef[4][2][2][6][6][3];
+	__u8 skip[3];
+	__u8 inter_mode[7][3];
+	__u8 interp_filter[4][2];
+	__u8 is_inter[4];
+	__u8 comp_mode[5];
+	__u8 single_ref[5][2];
+	__u8 comp_ref[5];
+	__u8 y_mode[4][9];
+	__u8 uv_mode[10][9];
+	__u8 partition[16][3];
+
+	struct v4l2_vp9_mv_probs mv;
+};
+
 /* MPEG-compression definitions kept for backwards compatibility */
 #ifndef __KERNEL__
 #define V4L2_CTRL_CLASS_MPEG            V4L2_CTRL_CLASS_CODEC
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index f118fe7a9f58..df8b9c486ba1 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -703,6 +703,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_VP8      v4l2_fourcc('V', 'P', '8', '0') /* VP8 */
 #define V4L2_PIX_FMT_VP8_FRAME v4l2_fourcc('V', 'P', '8', 'F') /* VP8 parsed frame */
 #define V4L2_PIX_FMT_VP9      v4l2_fourcc('V', 'P', '9', '0') /* VP9 */
+#define V4L2_PIX_FMT_VP9_FRAME v4l2_fourcc('V', 'P', '9', 'F') /* VP9 parsed frame */
 #define V4L2_PIX_FMT_HEVC     v4l2_fourcc('H', 'E', 'V', 'C') /* HEVC aka H.265 */
 #define V4L2_PIX_FMT_FWHT     v4l2_fourcc('F', 'W', 'H', 'T') /* Fast Walsh Hadamard Transform (vicodec) */
 #define V4L2_PIX_FMT_FWHT_STATELESS     v4l2_fourcc('S', 'F', 'W', 'H') /* Stateless FWHT (vicodec) */
@@ -1759,6 +1760,8 @@ struct v4l2_ext_control {
 		struct v4l2_ctrl_mpeg2_sequence __user *p_mpeg2_sequence;
 		struct v4l2_ctrl_mpeg2_picture __user *p_mpeg2_picture;
 		struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation;
+		struct v4l2_ctrl_vp9_compressed_hdr __user *p_vp9_compressed_hdr_probs;
+		struct v4l2_ctrl_vp9_frame __user *p_vp9_frame;
 		void __user *ptr;
 	};
 } __attribute__ ((packed));
@@ -1823,6 +1826,9 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_MPEG2_QUANTISATION   = 0x0250,
 	V4L2_CTRL_TYPE_MPEG2_SEQUENCE       = 0x0251,
 	V4L2_CTRL_TYPE_MPEG2_PICTURE        = 0x0252,
+
+	V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR	= 0x0260,
+	V4L2_CTRL_TYPE_VP9_FRAME		= 0x0261,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-- 
cgit v1.2.3


From 3e3b1fb0e5d95c0cd7278717185ca3fb00f5d771 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Date: Tue, 16 Nov 2021 14:38:36 +0000
Subject: media: Add VP9 v4l2 library

Provide code common to vp9 drivers in one central location.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/v4l2-vp9.h | 233 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 include/media/v4l2-vp9.h

(limited to 'include')

diff --git a/include/media/v4l2-vp9.h b/include/media/v4l2-vp9.h
new file mode 100644
index 000000000000..05478ad6d4ab
--- /dev/null
+++ b/include/media/v4l2-vp9.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper functions for vp9 codecs.
+ *
+ * Copyright (c) 2021 Collabora, Ltd.
+ *
+ * Author: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
+ */
+
+#ifndef _MEDIA_V4L2_VP9_H
+#define _MEDIA_V4L2_VP9_H
+
+#include <media/v4l2-ctrls.h>
+
+/**
+ * struct v4l2_vp9_frame_mv_context - motion vector-related probabilities
+ *
+ * @joint: motion vector joint probabilities.
+ * @sign: motion vector sign probabilities.
+ * @classes: motion vector class probabilities.
+ * @class0_bit: motion vector class0 bit probabilities.
+ * @bits: motion vector bits probabilities.
+ * @class0_fr: motion vector class0 fractional bit probabilities.
+ * @fr: motion vector fractional bit probabilities.
+ * @class0_hp: motion vector class0 high precision fractional bit probabilities.
+ * @hp: motion vector high precision fractional bit probabilities.
+ *
+ * A member of v4l2_vp9_frame_context.
+ */
+struct v4l2_vp9_frame_mv_context {
+	u8 joint[3];
+	u8 sign[2];
+	u8 classes[2][10];
+	u8 class0_bit[2];
+	u8 bits[2][10];
+	u8 class0_fr[2][2][3];
+	u8 fr[2][3];
+	u8 class0_hp[2];
+	u8 hp[2];
+};
+
+/**
+ * struct v4l2_vp9_frame_context - frame probabilities, including motion-vector related
+ *
+ * @tx8: TX 8x8 probabilities.
+ * @tx16: TX 16x16 probabilities.
+ * @tx32: TX 32x32 probabilities.
+ * @coef: coefficient probabilities.
+ * @skip: skip probabilities.
+ * @inter_mode: inter mode probabilities.
+ * @interp_filter: interpolation filter probabilities.
+ * @is_inter: is inter-block probabilities.
+ * @comp_mode: compound prediction mode probabilities.
+ * @single_ref: single ref probabilities.
+ * @comp_ref: compound ref probabilities.
+ * @y_mode: Y prediction mode probabilities.
+ * @uv_mode: UV prediction mode probabilities.
+ * @partition: partition probabilities.
+ * @mv: motion vector probabilities.
+ *
+ * Drivers which need to keep track of frame context(s) can use this struct.
+ * The members correspond to probability tables, which are specified only implicitly in the
+ * vp9 spec. Section 10.5 "Default probability tables" contains all the types of involved
+ * tables, i.e. the actual tables are of the same kind, and when they are reset (which is
+ * mandated by the spec sometimes) they are overwritten with values from the default tables.
+ */
+struct v4l2_vp9_frame_context {
+	u8 tx8[2][1];
+	u8 tx16[2][2];
+	u8 tx32[2][3];
+	u8 coef[4][2][2][6][6][3];
+	u8 skip[3];
+	u8 inter_mode[7][3];
+	u8 interp_filter[4][2];
+	u8 is_inter[4];
+	u8 comp_mode[5];
+	u8 single_ref[5][2];
+	u8 comp_ref[5];
+	u8 y_mode[4][9];
+	u8 uv_mode[10][9];
+	u8 partition[16][3];
+
+	struct v4l2_vp9_frame_mv_context mv;
+};
+
+/**
+ * struct v4l2_vp9_frame_symbol_counts - pointers to arrays of symbol counts
+ *
+ * @partition: partition counts.
+ * @skip: skip counts.
+ * @intra_inter: is inter-block counts.
+ * @tx32p: TX32 counts.
+ * @tx16p: TX16 counts.
+ * @tx8p: TX8 counts.
+ * @y_mode: Y prediction mode counts.
+ * @uv_mode: UV prediction mode counts.
+ * @comp: compound prediction mode counts.
+ * @comp_ref: compound ref counts.
+ * @single_ref: single ref counts.
+ * @mv_mode: inter mode counts.
+ * @filter: interpolation filter counts.
+ * @mv_joint: motion vector joint counts.
+ * @sign: motion vector sign counts.
+ * @classes: motion vector class counts.
+ * @class0: motion vector class0 bit counts.
+ * @bits: motion vector bits counts.
+ * @class0_fp: motion vector class0 fractional bit counts.
+ * @fp: motion vector fractional bit counts.
+ * @class0_hp: motion vector class0 high precision fractional bit counts.
+ * @hp: motion vector high precision fractional bit counts.
+ * @coeff: coefficient counts.
+ * @eob: eob counts
+ *
+ * The fields correspond to what is specified in section 8.3 "Clear counts process" of the spec.
+ * Different pieces of hardware can report the counts in different order, so we cannot rely on
+ * simply overlaying a struct on a relevant block of memory. Instead we provide pointers to
+ * arrays or array of pointers to arrays in case of coeff, or array of pointers for eob.
+ */
+struct v4l2_vp9_frame_symbol_counts {
+	u32 (*partition)[16][4];
+	u32 (*skip)[3][2];
+	u32 (*intra_inter)[4][2];
+	u32 (*tx32p)[2][4];
+	u32 (*tx16p)[2][4];
+	u32 (*tx8p)[2][2];
+	u32 (*y_mode)[4][10];
+	u32 (*uv_mode)[10][10];
+	u32 (*comp)[5][2];
+	u32 (*comp_ref)[5][2];
+	u32 (*single_ref)[5][2][2];
+	u32 (*mv_mode)[7][4];
+	u32 (*filter)[4][3];
+	u32 (*mv_joint)[4];
+	u32 (*sign)[2][2];
+	u32 (*classes)[2][11];
+	u32 (*class0)[2][2];
+	u32 (*bits)[2][10][2];
+	u32 (*class0_fp)[2][2][4];
+	u32 (*fp)[2][4];
+	u32 (*class0_hp)[2][2];
+	u32 (*hp)[2][2];
+	u32 (*coeff[4][2][2][6][6])[3];
+	u32 *eob[4][2][2][6][6][2];
+};
+
+extern const u8 v4l2_vp9_kf_y_mode_prob[10][10][9]; /* Section 10.4 of the spec */
+extern const u8 v4l2_vp9_kf_partition_probs[16][3]; /* Section 10.4 of the spec */
+extern const u8 v4l2_vp9_kf_uv_mode_prob[10][9]; /* Section 10.4 of the spec */
+extern const struct v4l2_vp9_frame_context v4l2_vp9_default_probs; /* Section 10.5 of the spec */
+
+/**
+ * v4l2_vp9_fw_update_probs() - Perform forward update of vp9 probabilities
+ *
+ * @probs: current probabilities values
+ * @deltas: delta values from compressed header
+ * @dec_params: vp9 frame decoding parameters
+ *
+ * This function performs forward updates of probabilities for the vp9 boolean decoder.
+ * The frame header can contain a directive to update the probabilities (deltas), if so, then
+ * the deltas are provided in the header, too. The userspace parses those and passes the said
+ * deltas struct to the kernel.
+ */
+void v4l2_vp9_fw_update_probs(struct v4l2_vp9_frame_context *probs,
+			      const struct v4l2_ctrl_vp9_compressed_hdr *deltas,
+			      const struct v4l2_ctrl_vp9_frame *dec_params);
+
+/**
+ * v4l2_vp9_reset_frame_ctx() - Reset appropriate frame context
+ *
+ * @dec_params: vp9 frame decoding parameters
+ * @frame_context: array of the 4 frame contexts
+ *
+ * This function resets appropriate frame contexts, based on what's in dec_params.
+ *
+ * Returns the frame context index after the update, which might be reset to zero if
+ * mandated by the spec.
+ */
+u8 v4l2_vp9_reset_frame_ctx(const struct v4l2_ctrl_vp9_frame *dec_params,
+			    struct v4l2_vp9_frame_context *frame_context);
+
+/**
+ * v4l2_vp9_adapt_coef_probs() - Perform backward update of vp9 coefficients probabilities
+ *
+ * @probs: current probabilities values
+ * @counts: values of symbol counts after the current frame has been decoded
+ * @use_128: flag to request that 128 is used as update factor if true, otherwise 112 is used
+ * @frame_is_intra: flag indicating that FrameIsIntra is true
+ *
+ * This function performs backward updates of coefficients probabilities for the vp9 boolean
+ * decoder. After a frame has been decoded the counts of how many times a given symbol has
+ * occurred are known and are used to update the probability of each symbol.
+ */
+void v4l2_vp9_adapt_coef_probs(struct v4l2_vp9_frame_context *probs,
+			       struct v4l2_vp9_frame_symbol_counts *counts,
+			       bool use_128,
+			       bool frame_is_intra);
+
+/**
+ * v4l2_vp9_adapt_noncoef_probs() - Perform backward update of vp9 non-coefficients probabilities
+ *
+ * @probs: current probabilities values
+ * @counts: values of symbol counts after the current frame has been decoded
+ * @reference_mode: specifies the type of inter prediction to be used. See
+ *	&v4l2_vp9_reference_mode for more details
+ * @interpolation_filter: specifies the filter selection used for performing inter prediction.
+ *	See &v4l2_vp9_interpolation_filter for more details
+ * @tx_mode: specifies the TX mode. See &v4l2_vp9_tx_mode for more details
+ * @flags: combination of V4L2_VP9_FRAME_FLAG_* flags
+ *
+ * This function performs backward updates of non-coefficients probabilities for the vp9 boolean
+ * decoder. After a frame has been decoded the counts of how many times a given symbol has
+ * occurred are known and are used to update the probability of each symbol.
+ */
+void v4l2_vp9_adapt_noncoef_probs(struct v4l2_vp9_frame_context *probs,
+				  struct v4l2_vp9_frame_symbol_counts *counts,
+				  u8 reference_mode, u8 interpolation_filter, u8 tx_mode,
+				  u32 flags);
+
+/**
+ * v4l2_vp9_seg_feat_enabled() - Check if a segmentation feature is enabled
+ *
+ * @feature_enabled: array of 8-bit flags (for all segments)
+ * @feature: id of the feature to check
+ * @segid: id of the segment to look up
+ *
+ * This function returns true if a given feature is active in a given segment.
+ */
+bool
+v4l2_vp9_seg_feat_enabled(const u8 *feature_enabled,
+			  unsigned int feature,
+			  unsigned int segid);
+
+#endif /* _MEDIA_V4L2_VP9_H */
-- 
cgit v1.2.3


From 16e0c2474fcfaa8a6c61abfaa981994c4564a628 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Mon, 22 Nov 2021 01:27:36 +0200
Subject: dt-bindings: clock: Add bindings for Exynos850 CMU_APM

CMU_APM generates clocks for APM IP-core (Active Power Management). In
particular it generates RTC clocks, which are needed to enable rtc-s3c
driver on Exynos850 SoC.

Add clock indices and binding documentation for CMU_APM.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/20211121232741.6967-2-semen.protsenko@linaro.org
---
 include/dt-bindings/clock/exynos850.h | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h
index 8999184f94a2..df3978b58304 100644
--- a/include/dt-bindings/clock/exynos850.h
+++ b/include/dt-bindings/clock/exynos850.h
@@ -55,7 +55,34 @@
 #define CLK_GOUT_PERI_BUS		43
 #define CLK_GOUT_PERI_UART		44
 #define CLK_GOUT_PERI_IP		45
-#define TOP_NR_CLK			46
+#define CLK_MOUT_CLKCMU_APM_BUS		46
+#define CLK_DOUT_CLKCMU_APM_BUS		47
+#define CLK_GOUT_CLKCMU_APM_BUS		48
+#define TOP_NR_CLK			49
+
+/* CMU_APM */
+#define CLK_RCO_I3C_PMIC		1
+#define OSCCLK_RCO_APM			2
+#define CLK_RCO_APM__ALV		3
+#define CLK_DLL_DCO			4
+#define CLK_MOUT_APM_BUS_USER		5
+#define CLK_MOUT_RCO_APM_I3C_USER	6
+#define CLK_MOUT_RCO_APM_USER		7
+#define CLK_MOUT_DLL_USER		8
+#define CLK_MOUT_CLKCMU_CHUB_BUS	9
+#define CLK_MOUT_APM_BUS		10
+#define CLK_MOUT_APM_I3C		11
+#define CLK_DOUT_CLKCMU_CHUB_BUS	12
+#define CLK_DOUT_APM_BUS		13
+#define CLK_DOUT_APM_I3C		14
+#define CLK_GOUT_CLKCMU_CMGP_BUS	15
+#define CLK_GOUT_CLKCMU_CHUB_BUS	16
+#define CLK_GOUT_RTC_PCLK		17
+#define CLK_GOUT_TOP_RTC_PCLK		18
+#define CLK_GOUT_I3C_PCLK		19
+#define CLK_GOUT_I3C_SCLK		20
+#define CLK_GOUT_SPEEDY_PCLK		21
+#define APM_NR_CLK			22
 
 /* CMU_HSI */
 #define CLK_MOUT_HSI_BUS_USER		1
-- 
cgit v1.2.3


From c2afeb79fdb24de4cea73f12d2ede84a5a68fa08 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Mon, 22 Nov 2021 01:27:38 +0200
Subject: dt-bindings: clock: Add bindings for Exynos850 CMU_CMGP

CMU_CMGP generates USI and ADC clocks for BLK_ALIVE. In particular USI
clocks are needed for HSI2C_3 and HSI2C_4 instances.

Add clock indices and bindings documentation for CMU_CMGP domain.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/20211121232741.6967-4-semen.protsenko@linaro.org
---
 include/dt-bindings/clock/exynos850.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h
index df3978b58304..8aa5e82af0d3 100644
--- a/include/dt-bindings/clock/exynos850.h
+++ b/include/dt-bindings/clock/exynos850.h
@@ -84,6 +84,23 @@
 #define CLK_GOUT_SPEEDY_PCLK		21
 #define APM_NR_CLK			22
 
+/* CMU_CMGP */
+#define CLK_RCO_CMGP			1
+#define CLK_MOUT_CMGP_ADC		2
+#define CLK_MOUT_CMGP_USI0		3
+#define CLK_MOUT_CMGP_USI1		4
+#define CLK_DOUT_CMGP_ADC		5
+#define CLK_DOUT_CMGP_USI0		6
+#define CLK_DOUT_CMGP_USI1		7
+#define CLK_GOUT_CMGP_ADC_S0_PCLK	8
+#define CLK_GOUT_CMGP_ADC_S1_PCLK	9
+#define CLK_GOUT_CMGP_GPIO_PCLK		10
+#define CLK_GOUT_CMGP_USI0_IPCLK	11
+#define CLK_GOUT_CMGP_USI0_PCLK		12
+#define CLK_GOUT_CMGP_USI1_IPCLK	13
+#define CLK_GOUT_CMGP_USI1_PCLK		14
+#define CMGP_NR_CLK			15
+
 /* CMU_HSI */
 #define CLK_MOUT_HSI_BUS_USER		1
 #define CLK_MOUT_HSI_MMC_CARD_USER	2
-- 
cgit v1.2.3


From 448f413a8bdc727d25d9a786ccbdb974fb85d973 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Thu, 18 Nov 2021 20:12:40 +0800
Subject: ethtool: add support to set/get tx copybreak buf size via ethtool

Add support for ethtool to set/get tx copybreak buf size.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index a2223b685451..7bc4b8def12c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -231,6 +231,7 @@ enum tunable_id {
 	ETHTOOL_RX_COPYBREAK,
 	ETHTOOL_TX_COPYBREAK,
 	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
+	ETHTOOL_TX_COPYBREAK_BUF_SIZE,
 	/*
 	 * Add your fresh new tunable attribute above and remember to update
 	 * tunable_strings[] in net/ethtool/common.c
-- 
cgit v1.2.3


From 0b70c256eba8448b072d25c95ee65e59da8970de Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Thu, 18 Nov 2021 20:12:42 +0800
Subject: ethtool: add support to set/get rx buf len via ethtool

Add support to set rx buf len via ethtool -G parameter and get
rx buf len via ethtool -g parameter.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h              | 18 ++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 845a0ffc16ee..0b252b82988b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -67,6 +67,22 @@ enum {
 	ETH_RSS_HASH_FUNCS_COUNT
 };
 
+/**
+ * struct kernel_ethtool_ringparam - RX/TX ring configuration
+ * @rx_buf_len: Current length of buffers on the rx ring.
+ */
+struct kernel_ethtool_ringparam {
+	u32	rx_buf_len;
+};
+
+/**
+ * enum ethtool_supported_ring_param - indicator caps for setting ring params
+ * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len
+ */
+enum ethtool_supported_ring_param {
+	ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0),
+};
+
 #define __ETH_RSS_HASH_BIT(bit)	((u32)1 << (bit))
 #define __ETH_RSS_HASH(name)	__ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT)
 
@@ -432,6 +448,7 @@ struct ethtool_module_power_mode_params {
  * @cap_link_lanes_supported: indicates if the driver supports lanes
  *	parameter.
  * @supported_coalesce_params: supported types of interrupt coalescing.
+ * @supported_ring_params: supported ring params.
  * @get_drvinfo: Report driver/device information.  Should only set the
  *	@driver, @version, @fw_version and @bus_info fields.  If not
  *	implemented, the @driver and @bus_info fields will be filled in
@@ -613,6 +630,7 @@ struct ethtool_module_power_mode_params {
 struct ethtool_ops {
 	u32     cap_link_lanes_supported:1;
 	u32	supported_coalesce_params;
+	u32	supported_ring_params;
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
 	int	(*get_regs_len)(struct net_device *);
 	void	(*get_regs)(struct net_device *, struct ethtool_regs *, void *);
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 999777d32dcf..cca6e474a085 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -329,6 +329,7 @@ enum {
 	ETHTOOL_A_RINGS_RX_MINI,			/* u32 */
 	ETHTOOL_A_RINGS_RX_JUMBO,			/* u32 */
 	ETHTOOL_A_RINGS_TX,				/* u32 */
+	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_RINGS_CNT,
-- 
cgit v1.2.3


From 7462494408cd3de8b0bc1e79670bf213288501d0 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Thu, 18 Nov 2021 20:12:43 +0800
Subject: ethtool: extend ringparam setting/getting API with rx_buf_len

Add two new parameters kernel_ringparam and extack for
.get_ringparam and .set_ringparam to extend more ring params
through netlink.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0b252b82988b..a26f37a27167 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -656,9 +656,13 @@ struct ethtool_ops {
 				struct kernel_ethtool_coalesce *,
 				struct netlink_ext_ack *);
 	void	(*get_ringparam)(struct net_device *,
-				 struct ethtool_ringparam *);
+				 struct ethtool_ringparam *,
+				 struct kernel_ethtool_ringparam *,
+				 struct netlink_ext_ack *);
 	int	(*set_ringparam)(struct net_device *,
-				 struct ethtool_ringparam *);
+				 struct ethtool_ringparam *,
+				 struct kernel_ethtool_ringparam *,
+				 struct netlink_ext_ack *);
 	void	(*get_pause_stats)(struct net_device *dev,
 				   struct ethtool_pause_stats *pause_stats);
 	void	(*get_pauseparam)(struct net_device *,
-- 
cgit v1.2.3


From 4b66d2161b8125b6caa6971815e85631cf3cf36f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Nov 2021 07:43:31 -0800
Subject: net: annotate accesses to dev->gso_max_size

dev->gso_max_size is written under RTNL protection, or when the device is
not yet visible, but is read locklessly.

Add the READ_ONCE()/WRITE_ONCE() pairs, and use netif_set_gso_max_size()
where we can to better document what is going on.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb7f2661d187..14eeb58ed197 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4731,7 +4731,8 @@ static inline bool netif_needs_gso(struct sk_buff *skb,
 static inline void netif_set_gso_max_size(struct net_device *dev,
 					  unsigned int size)
 {
-	dev->gso_max_size = size;
+	/* dev->gso_max_size is read locklessly from sk_setup_caps() */
+	WRITE_ONCE(dev->gso_max_size, size);
 }
 
 static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
-- 
cgit v1.2.3


From 6d872df3e3b91532b142de9044e5b4984017a55f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Nov 2021 07:43:32 -0800
Subject: net: annotate accesses to dev->gso_max_segs

dev->gso_max_segs is written under RTNL protection, or when the device is
not yet visible, but is read locklessly.

Add netif_set_gso_max_segs() helper.

Add the READ_ONCE()/WRITE_ONCE() pairs, and use netif_set_gso_max_segs()
where we can to better document what is going on.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 14eeb58ed197..df049864661d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4735,6 +4735,13 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	WRITE_ONCE(dev->gso_max_size, size);
 }
 
+static inline void netif_set_gso_max_segs(struct net_device *dev,
+					  unsigned int segs)
+{
+	/* dev->gso_max_segs is read locklessly from sk_setup_caps() */
+	WRITE_ONCE(dev->gso_max_segs, segs);
+}
+
 static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
 					int pulled_hlen, u16 mac_offset,
 					int mac_len)
-- 
cgit v1.2.3


From 291dcae39bc482f7edc91a50d97027327e0cf5f9 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@seco.com>
Date: Fri, 19 Nov 2021 10:58:09 -0500
Subject: net: phylink: Add helpers for c22 registers without MDIO

Some devices expose memory-mapped c22-compliant PHYs. Because these
devices do not have an MDIO bus, we cannot use the existing helpers.
Refactor the existing helpers to allow supplying the values for c22
registers directly, instead of using MDIO to access them. Only get_state
and set_advertisement are converted, since they contain the most complex
logic. Because set_advertisement is never actually used outside
phylink_mii_c22_pcs_config, move the MDIO-writing part into that
function. Because some modes do not need the advertisement register set
at all, we use -EINVAL for this purpose.

Additionally, a new function phylink_pcs_enable_an is provided to
determine whether to enable autonegotiation.

Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 3563820a1765..01224235df0f 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -527,11 +527,12 @@ void phylink_set_port_modes(unsigned long *bits);
 void phylink_set_10g_modes(unsigned long *mask);
 void phylink_helper_basex_speed(struct phylink_link_state *state);
 
+void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
+				      u16 bmsr, u16 lpa);
 void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
 				   struct phylink_link_state *state);
-int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs,
-					  phy_interface_t interface,
-					  const unsigned long *advertising);
+int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface,
+					     const unsigned long *advertising);
 int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
 			       phy_interface_t interface,
 			       const unsigned long *advertising);
-- 
cgit v1.2.3


From c4804670026b93f4ebddda30af89fd737bf93931 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Date: Sat, 20 Nov 2021 21:51:54 +0530
Subject: net: wwan: common debugfs base dir for wwan device

This patch set brings in a common debugfs base directory
i.e. /sys/kernel/debugfs/wwan/ in WWAN Subsystem for a
WWAN device instance. So that it avoids driver polluting
debugfs root with unrelated directories & possible name
collusion.

Having a common debugfs base directory for WWAN drivers
eases user to match control devices with debugfs entries.

WWAN Subsystem creates dentry (/sys/kernel/debugfs/wwan)
on module load & removes dentry on module unload.

When driver registers a new wwan device, dentry (wwanX)
is created for WWAN device instance & on driver unregister
dentry is removed.

New API is introduced to return the wwan device instance
dentry so that driver can create debugfs entries under it.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/wwan.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 9fac819f92e3..1646aa3e6779 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -171,4 +171,6 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
 
 void wwan_unregister_ops(struct device *parent);
 
+struct dentry *wwan_get_debugfs_dir(struct device *parent);
+
 #endif /* __WWAN_H */
-- 
cgit v1.2.3


From cb902b332f9545635911063b671927defa5866bf Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Mon, 22 Nov 2021 15:24:56 +0100
Subject: sections: global data can be in .bss

When checking an address is located in a global data section also check
for the .bss section as global variables initialized to 0 can be in
there (-fzero-initialized-in-bss).

This was found when looking at ensure_safe_net_sysctl which was failing
to detect non-init sysctl pointing to a global data section when the
data was in the .bss section.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-generic/sections.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 1dfadb2e878d..76a0f16e56cf 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -130,18 +130,24 @@ static inline bool init_section_intersects(void *virt, size_t size)
 
 /**
  * is_kernel_core_data - checks if the pointer address is located in the
- *			 .data section
+ *			 .data or .bss section
  *
  * @addr: address to check
  *
- * Returns: true if the address is located in .data, false otherwise.
+ * Returns: true if the address is located in .data or .bss, false otherwise.
  * Note: On some archs it may return true for core RODATA, and false
  *       for others. But will always be true for core RW data.
  */
 static inline bool is_kernel_core_data(unsigned long addr)
 {
-	return addr >= (unsigned long)_sdata &&
-	       addr < (unsigned long)_edata;
+	if (addr >= (unsigned long)_sdata && addr < (unsigned long)_edata)
+		return true;
+
+	if (addr >= (unsigned long)__bss_start &&
+	    addr < (unsigned long)__bss_stop)
+		return true;
+
+	return false;
 }
 
 /**
-- 
cgit v1.2.3


From 6deb3fb22da19e21ca5372fb34fb868bfceaf74c Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Tue, 9 Nov 2021 20:56:56 +0800
Subject: clk: imx8mp: Remove IPG_AUDIO_ROOT from imx8mp-clock.h

Since the commit b24e288d5063 ("clk: imx: Remove the audio ipg clock
from imx8mp") removes the non-existing IPG_AUDIO_ROOT from the
clk-imx8mp.c, and this definition is not used by anywhere, let us
removed it in the imx8mp-clock.h as well.

Signed-off-by: Hui Wang <hui.wang@canonical.com>
Reviewed-by: Abel Vesa <abel.vesa@nxp.com>
Link: https://lore.kernel.org/r/20211109125657.63485-1-hui.wang@canonical.com
Signed-off-by: Abel Vesa <abel.vesa@nxp.com>
---
 include/dt-bindings/clock/imx8mp-clock.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/imx8mp-clock.h b/include/dt-bindings/clock/imx8mp-clock.h
index 43927a1b9e94..235c7a00d379 100644
--- a/include/dt-bindings/clock/imx8mp-clock.h
+++ b/include/dt-bindings/clock/imx8mp-clock.h
@@ -117,7 +117,6 @@
 #define IMX8MP_CLK_AUDIO_AHB			108
 #define IMX8MP_CLK_MIPI_DSI_ESC_RX		109
 #define IMX8MP_CLK_IPG_ROOT			110
-#define IMX8MP_CLK_IPG_AUDIO_ROOT		111
 #define IMX8MP_CLK_DRAM_ALT			112
 #define IMX8MP_CLK_DRAM_APB			113
 #define IMX8MP_CLK_VPU_G1			114
-- 
cgit v1.2.3


From fba84957e2e2e201cf4e352efe0c7cac0fbb5d5d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 20 Nov 2021 16:31:48 -0800
Subject: skbuff: Move conditional preprocessor directives out of struct
 sk_buff

In preparation for using the struct_group() macro in struct sk_buff,
move the conditional preprocessor directives out of the region of struct
sk_buff that will be enclosed by struct_group(). While GCC and Clang are
happy with conditional preprocessor directives here, sparse is not, even
under -Wno-directive-within-macro[1], as would be seen under a C=1 build:

net/core/filter.c: note: in included file (through include/linux/netlink.h, include/linux/sock_diag.h):
./include/linux/skbuff.h:820:1: warning: directive in macro's argument list
./include/linux/skbuff.h:822:1: warning: directive in macro's argument list
./include/linux/skbuff.h:846:1: warning: directive in macro's argument list
./include/linux/skbuff.h:848:1: warning: directive in macro's argument list

Additionally remove empty macro argument definitions and usage.

"objdump -d" shows no object code differences.

[1] https://www.spinics.net/lists/linux-sparse/msg10857.html

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 059b6266dcd7..c7889afe0d0d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -795,7 +795,7 @@ struct sk_buff {
 #else
 #define CLONED_MASK	1
 #endif
-#define CLONED_OFFSET()		offsetof(struct sk_buff, __cloned_offset)
+#define CLONED_OFFSET		offsetof(struct sk_buff, __cloned_offset)
 
 	/* private: */
 	__u8			__cloned_offset[0];
@@ -818,18 +818,10 @@ struct sk_buff {
 	__u32			headers_start[0];
 	/* public: */
 
-/* if you move pkt_type around you also must adapt those constants */
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-#define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
-
 	/* private: */
 	__u8			__pkt_type_offset[0];
 	/* public: */
-	__u8			pkt_type:3;
+	__u8			pkt_type:3; /* see PKT_TYPE_MAX */
 	__u8			ignore_df:1;
 	__u8			nf_trace:1;
 	__u8			ip_summed:2;
@@ -845,16 +837,10 @@ struct sk_buff {
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_VLAN_PRESENT_BIT	7
-#else
-#define PKT_VLAN_PRESENT_BIT	0
-#endif
-#define PKT_VLAN_PRESENT_OFFSET()	offsetof(struct sk_buff, __pkt_vlan_present_offset)
 	/* private: */
 	__u8			__pkt_vlan_present_offset[0];
 	/* public: */
-	__u8			vlan_present:1;
+	__u8			vlan_present:1;	/* See PKT_VLAN_PRESENT_BIT */
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
 	__u8			csum_not_inet:1;
@@ -953,6 +939,22 @@ struct sk_buff {
 #endif
 };
 
+/* if you move pkt_type around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX	(7 << 5)
+#else
+#define PKT_TYPE_MAX	7
+#endif
+#define PKT_TYPE_OFFSET		offsetof(struct sk_buff, __pkt_type_offset)
+
+/* if you move pkt_vlan_present around you also must adapt these constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_VLAN_PRESENT_BIT	7
+#else
+#define PKT_VLAN_PRESENT_BIT	0
+#endif
+#define PKT_VLAN_PRESENT_OFFSET	offsetof(struct sk_buff, __pkt_vlan_present_offset)
+
 #ifdef __KERNEL__
 /*
  *	Handling routines are only of interest to the kernel
-- 
cgit v1.2.3


From 03f61041c17914355dde7261be9ccdc821ddd454 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 20 Nov 2021 16:31:49 -0800
Subject: skbuff: Switch structure bounds to struct_group()

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

Replace the existing empty member position markers "headers_start" and
"headers_end" with a struct_group(). This will allow memcpy() and sizeof()
to more easily reason about sizes, and improve readability.

"pahole" shows no size nor member offset changes to struct sk_buff.
"objdump -d" shows no object code changes (outside of WARNs affected by
source line number changes).

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com> # drivers/net/wireguard/*
Link: https://lore.kernel.org/lkml/20210728035006.GD35706@embeddedor
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c7889afe0d0d..eba256af64a5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -811,12 +811,10 @@ struct sk_buff {
 	__u8			active_extensions;
 #endif
 
-	/* fields enclosed in headers_start/headers_end are copied
+	/* Fields enclosed in headers group are copied
 	 * using a single memcpy() in __copy_skb_header()
 	 */
-	/* private: */
-	__u32			headers_start[0];
-	/* public: */
+	struct_group(headers,
 
 	/* private: */
 	__u8			__pkt_type_offset[0];
@@ -921,9 +919,7 @@ struct sk_buff {
 	u64			kcov_handle;
 #endif
 
-	/* private: */
-	__u32			headers_end[0];
-	/* public: */
+	); /* end headers group */
 
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
-- 
cgit v1.2.3


From 28c916ade1bd4205958f74bb817fd3a05dbb7afc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 18 Nov 2021 16:30:14 +0100
Subject: ASoC: soc-acpi: Set mach->id field on comp_ids matches

Commit dac7cbd55dca ("ASoC: Intel: soc-acpi-byt: shrink tables using
compatible IDs") and commit 959ae8215a9e ("ASoC: Intel: soc-acpi-cht:
shrink tables using compatible IDs") simplified the match tables in
soc-acpi-intel-byt-match.c and soc-acpi-intel-cht-match.c by merging
identical entries using the new .comp_ids snd_soc_acpi_mach field to
point a single entry to multiple ACPI HIDs and clearing the previously
unique per entry .id field.

But various machine drivers from sound/soc/intel/boards rely on mach->id
in one or more ways, e.g. some drivers contain the following snippets:

	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);

	pkg_found = snd_soc_acpi_find_package_from_hid(mach->id, ...

	if (!strncmp(snd_soc_cards[i].codec_id, mach->id, 8)) { ...

All of which are broken by the match table shrinking.

Make the snd_soc_acpi_mach.id field non const (the storage for the tables
already is non const) and on a comps_ids match copy the matching HID to
the id field to fix this.

Fixes: dac7cbd55dca ("ASoC: Intel: soc-acpi-byt: shrink tables using compatible IDs")
Fixes: 959ae8215a9e ("ASoC: Intel: soc-acpi-cht: shrink tables using compatible IDs")
Suggested-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Cc: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Cc: Brent Lu <brent.lu@intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20211118153014.349222-1-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 31f4c4f9aeea..ac0893df9c76 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -147,7 +147,7 @@ struct snd_soc_acpi_link_adr {
  */
 /* Descriptor for SST ASoC machine driver */
 struct snd_soc_acpi_mach {
-	const u8 id[ACPI_ID_LEN];
+	u8 id[ACPI_ID_LEN];
 	const struct snd_soc_acpi_codecs *comp_ids;
 	const u32 link_mask;
 	const struct snd_soc_acpi_link_adr *links;
-- 
cgit v1.2.3


From 8837cbbf854246f5f4d565f21e6baa945d37aded Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Mon, 22 Nov 2021 17:15:12 +0200
Subject: net: ipv6: add fib6_nh_release_dsts stub

We need a way to release a fib6_nh's per-cpu dsts when replacing
nexthops otherwise we can end up with stale per-cpu dsts which hold net
device references, so add a new IPv6 stub called fib6_nh_release_dsts.
It must be used after an RCU grace period, so no new dsts can be created
through a group's nexthop entry.
Similar to fib6_nh_release it shouldn't be used if fib6_nh_init has failed
so it doesn't need a dummy stub when IPv6 is not enabled.

Fixes: 7bf4796dd099 ("nexthops: add support for replace")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h    | 1 +
 include/net/ipv6_stubs.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c412dde4d67d..83b8070d1cc9 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -485,6 +485,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 		 struct fib6_config *cfg, gfp_t gfp_flags,
 		 struct netlink_ext_ack *extack);
 void fib6_nh_release(struct fib6_nh *fib6_nh);
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
 
 int call_fib6_entry_notifiers(struct net *net,
 			      enum fib_event_type event_type,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index afbce90c4480..45e0339be6fa 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -47,6 +47,7 @@ struct ipv6_stub {
 			    struct fib6_config *cfg, gfp_t gfp_flags,
 			    struct netlink_ext_ack *extack);
 	void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+	void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
 	void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
 	int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
 	void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
-- 
cgit v1.2.3


From 3e9fdc6b73ca862e72ea8a563638cecdc11d26e2 Mon Sep 17 00:00:00 2001
From: Yassine Oudjana <y.oudjana@protonmail.com>
Date: Thu, 21 Oct 2021 13:24:54 +0000
Subject: dt-bindings: interconnect: Add Qualcomm MSM8996 DT bindings

Add bindings for interconnects on Qualcomm MSM8996.

Signed-off-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> #db820c
Link: https://lore.kernel.org/r/20211021132329.234942-4-y.oudjana@protonmail.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,msm8996.h | 163 ++++++++++++++++++++++++
 1 file changed, 163 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,msm8996.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,msm8996.h b/include/dt-bindings/interconnect/qcom,msm8996.h
new file mode 100644
index 000000000000..a0b7c0ec7bed
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,msm8996.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * Qualcomm MSM8996 interconnect IDs
+ *
+ * Copyright (c) 2021 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8996_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8996_H
+
+/* A0NOC */
+#define MASTER_PCIE_0			0
+#define MASTER_PCIE_1			1
+#define MASTER_PCIE_2			2
+
+/* A1NOC */
+#define MASTER_CNOC_A1NOC		0
+#define MASTER_CRYPTO_CORE0		1
+#define MASTER_PNOC_A1NOC		2
+
+/* A2NOC */
+#define MASTER_USB3			0
+#define MASTER_IPA			1
+#define MASTER_UFS			2
+
+/* BIMC */
+#define MASTER_AMPSS_M0			0
+#define MASTER_GRAPHICS_3D		1
+#define MASTER_MNOC_BIMC		2
+#define MASTER_SNOC_BIMC		3
+#define SLAVE_EBI_CH0			4
+#define SLAVE_HMSS_L3			5
+#define SLAVE_BIMC_SNOC_0		6
+#define SLAVE_BIMC_SNOC_1		7
+
+/* CNOC */
+#define MASTER_SNOC_CNOC		0
+#define MASTER_QDSS_DAP			1
+#define SLAVE_CNOC_A1NOC		2
+#define SLAVE_CLK_CTL			3
+#define SLAVE_TCSR			4
+#define SLAVE_TLMM			5
+#define SLAVE_CRYPTO_0_CFG		6
+#define SLAVE_MPM			7
+#define SLAVE_PIMEM_CFG			8
+#define SLAVE_IMEM_CFG			9
+#define SLAVE_MESSAGE_RAM		10
+#define SLAVE_BIMC_CFG			11
+#define SLAVE_PMIC_ARB			12
+#define SLAVE_PRNG			13
+#define SLAVE_DCC_CFG			14
+#define SLAVE_RBCPR_MX			15
+#define SLAVE_QDSS_CFG			16
+#define SLAVE_RBCPR_CX			17
+#define SLAVE_QDSS_RBCPR_APU		18
+#define SLAVE_CNOC_MNOC_CFG		19
+#define SLAVE_SNOC_CFG			20
+#define SLAVE_SNOC_MPU_CFG		21
+#define SLAVE_EBI1_PHY_CFG		22
+#define SLAVE_A0NOC_CFG			23
+#define SLAVE_PCIE_1_CFG		24
+#define SLAVE_PCIE_2_CFG		25
+#define SLAVE_PCIE_0_CFG		26
+#define SLAVE_PCIE20_AHB2PHY		27
+#define SLAVE_A0NOC_MPU_CFG		28
+#define SLAVE_UFS_CFG			29
+#define SLAVE_A1NOC_CFG			30
+#define SLAVE_A1NOC_MPU_CFG		31
+#define SLAVE_A2NOC_CFG			32
+#define SLAVE_A2NOC_MPU_CFG		33
+#define SLAVE_SSC_CFG			34
+#define SLAVE_A0NOC_SMMU_CFG		35
+#define SLAVE_A1NOC_SMMU_CFG		36
+#define SLAVE_A2NOC_SMMU_CFG		37
+#define SLAVE_LPASS_SMMU_CFG		38
+#define SLAVE_CNOC_MNOC_MMSS_CFG	39
+
+/* MNOC */
+#define MASTER_CNOC_MNOC_CFG		0
+#define MASTER_CPP			1
+#define MASTER_JPEG			2
+#define MASTER_MDP_PORT0		3
+#define MASTER_MDP_PORT1		4
+#define MASTER_ROTATOR			5
+#define MASTER_VIDEO_P0			6
+#define MASTER_VFE			7
+#define MASTER_SNOC_VMEM		8
+#define MASTER_VIDEO_P0_OCMEM		9
+#define MASTER_CNOC_MNOC_MMSS_CFG	10
+#define SLAVE_MNOC_BIMC			11
+#define SLAVE_VMEM			12
+#define SLAVE_SERVICE_MNOC		13
+#define SLAVE_MMAGIC_CFG		14
+#define SLAVE_CPR_CFG			15
+#define SLAVE_MISC_CFG			16
+#define SLAVE_VENUS_THROTTLE_CFG	17
+#define SLAVE_VENUS_CFG			18
+#define SLAVE_VMEM_CFG			19
+#define SLAVE_DSA_CFG			20
+#define SLAVE_MMSS_CLK_CFG		21
+#define SLAVE_DSA_MPU_CFG		22
+#define SLAVE_MNOC_MPU_CFG		23
+#define SLAVE_DISPLAY_CFG		24
+#define SLAVE_DISPLAY_THROTTLE_CFG	25
+#define SLAVE_CAMERA_CFG		26
+#define SLAVE_CAMERA_THROTTLE_CFG	27
+#define SLAVE_GRAPHICS_3D_CFG		28
+#define SLAVE_SMMU_MDP_CFG		29
+#define SLAVE_SMMU_ROT_CFG		30
+#define SLAVE_SMMU_VENUS_CFG		31
+#define SLAVE_SMMU_CPP_CFG		32
+#define SLAVE_SMMU_JPEG_CFG		33
+#define SLAVE_SMMU_VFE_CFG		34
+
+/* PNOC */
+#define MASTER_SNOC_PNOC		0
+#define MASTER_SDCC_1			1
+#define MASTER_SDCC_2			2
+#define MASTER_SDCC_4			3
+#define MASTER_USB_HS			4
+#define MASTER_BLSP_1			5
+#define MASTER_BLSP_2			6
+#define MASTER_TSIF			7
+#define SLAVE_PNOC_A1NOC		8
+#define SLAVE_USB_HS			9
+#define SLAVE_SDCC_2			10
+#define SLAVE_SDCC_4			11
+#define SLAVE_TSIF			12
+#define SLAVE_BLSP_2			13
+#define SLAVE_SDCC_1			14
+#define SLAVE_BLSP_1			15
+#define SLAVE_PDM			16
+#define SLAVE_AHB2PHY			17
+
+/* SNOC */
+#define MASTER_HMSS			0
+#define MASTER_QDSS_BAM			1
+#define MASTER_SNOC_CFG			2
+#define MASTER_BIMC_SNOC_0		3
+#define MASTER_BIMC_SNOC_1		4
+#define MASTER_A0NOC_SNOC		5
+#define MASTER_A1NOC_SNOC		6
+#define MASTER_A2NOC_SNOC		7
+#define MASTER_QDSS_ETR			8
+#define SLAVE_A0NOC_SNOC		9
+#define SLAVE_A1NOC_SNOC		10
+#define SLAVE_A2NOC_SNOC		11
+#define SLAVE_HMSS			12
+#define SLAVE_LPASS			13
+#define SLAVE_USB3			14
+#define SLAVE_SNOC_BIMC			15
+#define SLAVE_SNOC_CNOC			16
+#define SLAVE_IMEM			17
+#define SLAVE_PIMEM			18
+#define SLAVE_SNOC_VMEM			19
+#define SLAVE_SNOC_PNOC			20
+#define SLAVE_QDSS_STM			21
+#define SLAVE_PCIE_0			22
+#define SLAVE_PCIE_1			23
+#define SLAVE_PCIE_2			24
+#define SLAVE_SERVICE_SNOC		25
+
+#endif
-- 
cgit v1.2.3


From 325e0d0aa683a96b9d9cd5802be524d4da5e2dd2 Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Mon, 18 Oct 2021 18:16:01 -0500
Subject: devlink: Add 'enable_iwarp' generic device param

Add a new device generic parameter to enable and disable
iWARP functionality on a multi-protocol RDMA device.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Tested-by: Leszek Kaliszczuk <leszek.kaliszczuk@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index aab3d007c577..e3c88fabd700 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -485,6 +485,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+	DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -534,6 +535,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet"
 #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp"
+#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From e523af4ee56090fbdd9cf474752448d35930bcd4 Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Mon, 18 Oct 2021 18:16:02 -0500
Subject: net/ice: Add support for enable_iwarp and enable_roce devlink param

Allow support for 'enable_iwarp' and 'enable_roce' devlink params to turn
on/off iWARP or RoCE protocol support for E800 devices.

For example, a user can turn on iWARP functionality with,

devlink dev param set pci/0000:07:00.0 name enable_iwarp value true cmode runtime

This add an iWARP auxiliary rdma device, ice.iwarp.<>, under this PF.

A user request to enable both iWARP and RoCE under the same PF is rejected
since this device does not support both protocols simultaneously on the
same port.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Tested-by: Leszek Kaliszczuk <leszek.kaliszczuk@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/iidc.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
index e32f6712aee0..1289593411d3 100644
--- a/include/linux/net/intel/iidc.h
+++ b/include/linux/net/intel/iidc.h
@@ -26,6 +26,11 @@ enum iidc_reset_type {
 	IIDC_GLOBR,
 };
 
+enum iidc_rdma_protocol {
+	IIDC_RDMA_PROTOCOL_IWARP = BIT(0),
+	IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1),
+};
+
 #define IIDC_MAX_USER_PRIORITY		8
 
 /* Struct to hold per RDMA Qset info */
@@ -70,8 +75,6 @@ int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
 int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
 void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
 
-#define IIDC_RDMA_ROCE_NAME	"roce"
-
 /* Structure representing auxiliary driver tailored information about the core
  * PCI dev, each auxiliary driver using the IIDC interface will have an
  * instance of this struct dedicated to it.
-- 
cgit v1.2.3


From 6326948f940dc3f77066d5cdc44ba6afe67830c0 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Wed, 29 Sep 2021 11:01:21 -0400
Subject: lsm: security_task_getsecid_subj() ->
 security_current_getsecid_subj()

The security_task_getsecid_subj() LSM hook invites misuse by allowing
callers to specify a task even though the hook is only safe when the
current task is referenced.  Fix this by removing the task_struct
argument to the hook, requiring LSM implementations to use the
current task.  While we are changing the hook declaration we also
rename the function to security_current_getsecid_subj() in an effort
to reinforce that the hook captures the subjective credentials of the
current task and not an arbitrary task on the system.

Reviewed-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 3 +--
 include/linux/lsm_hooks.h     | 8 +++-----
 include/linux/security.h      | 4 ++--
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index df8de62f4710..ae2228f0711d 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -206,8 +206,7 @@ LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old,
 LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid)
 LSM_HOOK(int, 0, task_getpgid, struct task_struct *p)
 LSM_HOOK(int, 0, task_getsid, struct task_struct *p)
-LSM_HOOK(void, LSM_RET_VOID, task_getsecid_subj,
-	 struct task_struct *p, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid)
 LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj,
 	 struct task_struct *p, u32 *secid)
 LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d45b6f6e27fd..52c1990644b9 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -719,11 +719,9 @@
  *	@p.
  *	@p contains the task_struct for the process.
  *	Return 0 if permission is granted.
- * @task_getsecid_subj:
- *	Retrieve the subjective security identifier of the task_struct in @p
- *	and return it in @secid.  Special care must be taken to ensure that @p
- *	is the either the "current" task, or the caller has exclusive access
- *	to @p.
+ * @current_getsecid_subj:
+ *	Retrieve the subjective security identifier of the current task and
+ *	return it in @secid.
  *	In case of failure, @secid will be set to zero.
  * @task_getsecid_obj:
  *	Retrieve the objective security identifier of the task_struct in @p
diff --git a/include/linux/security.h b/include/linux/security.h
index bbf44a466832..bb301963e333 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -418,7 +418,7 @@ int security_task_fix_setgid(struct cred *new, const struct cred *old,
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
 int security_task_getpgid(struct task_struct *p);
 int security_task_getsid(struct task_struct *p);
-void security_task_getsecid_subj(struct task_struct *p, u32 *secid);
+void security_current_getsecid_subj(u32 *secid);
 void security_task_getsecid_obj(struct task_struct *p, u32 *secid);
 int security_task_setnice(struct task_struct *p, int nice);
 int security_task_setioprio(struct task_struct *p, int ioprio);
@@ -1119,7 +1119,7 @@ static inline int security_task_getsid(struct task_struct *p)
 	return 0;
 }
 
-static inline void security_task_getsecid_subj(struct task_struct *p, u32 *secid)
+static inline void security_current_getsecid_subj(u32 *secid)
 {
 	*secid = 0;
 }
-- 
cgit v1.2.3


From ea8a163e02d6925773129e2dd86e419e491b791d Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 16 Nov 2021 11:08:17 +0100
Subject: dt-bindings: phy: Add constants for lan966x serdes

Lan966x has: 2 integrated PHYs, 3 SerDes and 2 RGMII interfaces. Which
requires to be muxed based on the HW representation.

So add constants for each interface to be able to distinguish them.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Link: https://lore.kernel.org/r/20211116100818.1615762-3-horatiu.vultur@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/dt-bindings/phy/phy-lan966x-serdes.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/dt-bindings/phy/phy-lan966x-serdes.h

(limited to 'include')

diff --git a/include/dt-bindings/phy/phy-lan966x-serdes.h b/include/dt-bindings/phy/phy-lan966x-serdes.h
new file mode 100644
index 000000000000..4330269a901e
--- /dev/null
+++ b/include/dt-bindings/phy/phy-lan966x-serdes.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+
+#ifndef __PHY_LAN966X_SERDES_H__
+#define __PHY_LAN966X_SERDES_H__
+
+#define CU(x)		(x)
+#define CU_MAX		CU(2)
+#define SERDES6G(x)	(CU_MAX + 1 + (x))
+#define SERDES6G_MAX	SERDES6G(3)
+#define RGMII(x)	(SERDES6G_MAX + 1 + (x))
+#define RGMII_MAX	RGMII(2)
+#define SERDES_MAX	(RGMII_MAX + 1)
+
+#endif
-- 
cgit v1.2.3


From ec3bb890817e4398f2d46e12e2e205495b116be9 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 22 Nov 2021 11:33:13 +0100
Subject: xfrm: fix dflt policy check when there is no policy configured

When there is no policy configured on the system, the default policy is
checked in xfrm_route_forward. However, it was done with the wrong
direction (XFRM_POLICY_FWD instead of XFRM_POLICY_OUT).
The default policy for XFRM_POLICY_FWD was checked just before, with a call
to xfrm[46]_policy_check().

CC: stable@vger.kernel.org
Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2308210793a0..55e574511af5 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1162,7 +1162,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
 	struct net *net = dev_net(skb->dev);
 
-	if (xfrm_default_allow(net, XFRM_POLICY_FWD))
+	if (xfrm_default_allow(net, XFRM_POLICY_OUT))
 		return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
 			(skb_dst(skb)->flags & DST_NOXFRM) ||
 			__xfrm_route_forward(skb, family);
-- 
cgit v1.2.3


From 3c542cfa8266e3364938d055b3d548b7bed7f08e Mon Sep 17 00:00:00 2001
From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Date: Mon, 22 Nov 2021 23:14:20 +0200
Subject: drm/i915/dg2: Tile 4 plane format support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TileF(Tile4 in bspec) format is 4K tile organized into
64B subtiles with same basic shape as for legacy TileY
which will be supported by Display13.

v2: - Fixed wrong case condition(Jani Nikula)
    - Increased I915_FORMAT_MOD_F_TILED up to 12(Imre Deak)

v3: - s/I915_TILING_F/TILING_4/g
    - s/I915_FORMAT_MOD_F_TILED/I915_FORMAT_MOD_4_TILED/g
    - Removed unneeded fencing code

v4: - Rebased, fixed merge conflict with new table-oriented
      format modifier checking(Stan)
    - Replaced the rest of "Tile F" mentions to "Tile 4"(Stan)

v5: - Still had to remove some Tile F mentionings
    - Moved has_4tile from adlp to DG2(Ramalingam C)
    - Check specifically for DG2, but not the Display13(Imre)

v6: - Moved Tile4 associating struct for modifier/display to
      the beginning(Imre Deak)
    - Removed unneeded case I915_FORMAT_MOD_4_TILED modifier
      checks(Imre Deak)
    - Fixed I915_FORMAT_MOD_4_TILED to be 9 instead of 12
      (Imre Deak)

v7: - Fixed display_ver to { 13, 13 }(Imre Deak)
    - Removed redundant newline(Imre Deak)

Reviewed-by: Imre Deak <imre.deak@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Juha-Pekka Heikkilä <juha-pekka.heikkila@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211122211420.31584-1-stanislav.lisovskiy@intel.com
---
 include/uapi/drm/drm_fourcc.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 7f652c96845b..41184a94935d 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -564,6 +564,14 @@ extern "C" {
  * pitch is required to be a multiple of 4 tile widths.
  */
 #define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8)
+/*
+ * Intel F-tiling(aka Tile4) layout
+ *
+ * This is a tiled layout using 4Kb tiles in row-major layout.
+ * Within the tile pixels are laid out in 64 byte units / sub-tiles in OWORD
+ * (16 bytes) chunks column-major..
+ */
+#define I915_FORMAT_MOD_4_TILED         fourcc_mod_code(INTEL, 9)
 
 /*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
-- 
cgit v1.2.3


From 91389c390521a02ecfb91270f5b9d7fae4312ae5 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Thu, 18 Nov 2021 21:33:37 -0600
Subject: clk: sunxi-ng: Allow the CCU core to be built as a module

Like the individual CCU drivers, it can be beneficial for memory
consumption of cross-platform configurations to only load the CCU core
on the relevant platform. For example, a generic arm64 kernel sees the
following improvement when building the CCU core and drivers as modules:

  before:
    text      data     bss     dec       hex      filename
    13882360  5251670  360800  19494830  12977ae  vmlinux

  after:
    text      data     bss     dec       hex      filename
    13734787  5086442  360800  19182029  124b1cd  vmlinux

So the result is a 390KB total reduction in kernel image size.

The one early clock provider (sun5i) requires the core to be built in.

Now that loading the MMC driver will trigger loading the CCU core, the
MMC timing mode functions do not need a compile-time fallback.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20211119033338.25486-5-samuel@sholland.org
---
 include/linux/clk/sunxi-ng.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/clk/sunxi-ng.h b/include/linux/clk/sunxi-ng.h
index 3cd14acde0a1..cf32123b39f5 100644
--- a/include/linux/clk/sunxi-ng.h
+++ b/include/linux/clk/sunxi-ng.h
@@ -6,22 +6,7 @@
 #ifndef _LINUX_CLK_SUNXI_NG_H_
 #define _LINUX_CLK_SUNXI_NG_H_
 
-#include <linux/errno.h>
-
-#ifdef CONFIG_SUNXI_CCU
 int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, bool new_mode);
 int sunxi_ccu_get_mmc_timing_mode(struct clk *clk);
-#else
-static inline int sunxi_ccu_set_mmc_timing_mode(struct clk *clk,
-						bool new_mode)
-{
-	return -ENOTSUPP;
-}
-
-static inline int sunxi_ccu_get_mmc_timing_mode(struct clk *clk)
-{
-	return -ENOTSUPP;
-}
-#endif
 
 #endif
-- 
cgit v1.2.3


From c962f10f3931e8409f67dc52725df13e23c67d2d Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Thu, 18 Nov 2021 22:35:39 -0600
Subject: dt-bindings: clk: Add compatibles for D1 CCUs

The D1 has a CCU and a R_CCU (PRCM CCU) like most other sunxi SoCs, with
3 and 4 clock inputs, respectively. Add the compatibles and bindings.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20211119043545.4010-2-samuel@sholland.org
---
 include/dt-bindings/clock/sun20i-d1-ccu.h   | 156 ++++++++++++++++++++++++++++
 include/dt-bindings/clock/sun20i-d1-r-ccu.h |  19 ++++
 include/dt-bindings/reset/sun20i-d1-ccu.h   |  77 ++++++++++++++
 include/dt-bindings/reset/sun20i-d1-r-ccu.h |  16 +++
 4 files changed, 268 insertions(+)
 create mode 100644 include/dt-bindings/clock/sun20i-d1-ccu.h
 create mode 100644 include/dt-bindings/clock/sun20i-d1-r-ccu.h
 create mode 100644 include/dt-bindings/reset/sun20i-d1-ccu.h
 create mode 100644 include/dt-bindings/reset/sun20i-d1-r-ccu.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/sun20i-d1-ccu.h b/include/dt-bindings/clock/sun20i-d1-ccu.h
new file mode 100644
index 000000000000..e3ac53315e1a
--- /dev/null
+++ b/include/dt-bindings/clock/sun20i-d1-ccu.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/*
+ * Copyright (C) 2020 huangzhenwei@allwinnertech.com
+ * Copyright (C) 2021 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN20I_D1_CCU_H_
+#define _DT_BINDINGS_CLK_SUN20I_D1_CCU_H_
+
+#define CLK_PLL_CPUX		0
+#define CLK_PLL_DDR0		1
+#define CLK_PLL_PERIPH0_4X	2
+#define CLK_PLL_PERIPH0_2X	3
+#define CLK_PLL_PERIPH0_800M	4
+#define CLK_PLL_PERIPH0		5
+#define CLK_PLL_PERIPH0_DIV3	6
+#define CLK_PLL_VIDEO0_4X	7
+#define CLK_PLL_VIDEO0_2X	8
+#define CLK_PLL_VIDEO0		9
+#define CLK_PLL_VIDEO1_4X	10
+#define CLK_PLL_VIDEO1_2X	11
+#define CLK_PLL_VIDEO1		12
+#define CLK_PLL_VE		13
+#define CLK_PLL_AUDIO0_4X	14
+#define CLK_PLL_AUDIO0_2X	15
+#define CLK_PLL_AUDIO0		16
+#define CLK_PLL_AUDIO1		17
+#define CLK_PLL_AUDIO1_DIV2	18
+#define CLK_PLL_AUDIO1_DIV5	19
+#define CLK_CPUX		20
+#define CLK_CPUX_AXI		21
+#define CLK_CPUX_APB		22
+#define CLK_PSI_AHB		23
+#define CLK_APB0		24
+#define CLK_APB1		25
+#define CLK_MBUS		26
+#define CLK_DE			27
+#define CLK_BUS_DE		28
+#define CLK_DI			29
+#define CLK_BUS_DI		30
+#define CLK_G2D			31
+#define CLK_BUS_G2D		32
+#define CLK_CE			33
+#define CLK_BUS_CE		34
+#define CLK_VE			35
+#define CLK_BUS_VE		36
+#define CLK_BUS_DMA		37
+#define CLK_BUS_MSGBOX0		38
+#define CLK_BUS_MSGBOX1		39
+#define CLK_BUS_MSGBOX2		40
+#define CLK_BUS_SPINLOCK	41
+#define CLK_BUS_HSTIMER		42
+#define CLK_AVS			43
+#define CLK_BUS_DBG		44
+#define CLK_BUS_PWM		45
+#define CLK_BUS_IOMMU		46
+#define CLK_DRAM		47
+#define CLK_MBUS_DMA		48
+#define CLK_MBUS_VE		49
+#define CLK_MBUS_CE		50
+#define CLK_MBUS_TVIN		51
+#define CLK_MBUS_CSI		52
+#define CLK_MBUS_G2D		53
+#define CLK_MBUS_RISCV		54
+#define CLK_BUS_DRAM		55
+#define CLK_MMC0		56
+#define CLK_MMC1		57
+#define CLK_MMC2		58
+#define CLK_BUS_MMC0		59
+#define CLK_BUS_MMC1		60
+#define CLK_BUS_MMC2		61
+#define CLK_BUS_UART0		62
+#define CLK_BUS_UART1		63
+#define CLK_BUS_UART2		64
+#define CLK_BUS_UART3		65
+#define CLK_BUS_UART4		66
+#define CLK_BUS_UART5		67
+#define CLK_BUS_I2C0		68
+#define CLK_BUS_I2C1		69
+#define CLK_BUS_I2C2		70
+#define CLK_BUS_I2C3		71
+#define CLK_SPI0		72
+#define CLK_SPI1		73
+#define CLK_BUS_SPI0		74
+#define CLK_BUS_SPI1		75
+#define CLK_EMAC_25M		76
+#define CLK_BUS_EMAC		77
+#define CLK_IR_TX		78
+#define CLK_BUS_IR_TX		79
+#define CLK_BUS_GPADC		80
+#define CLK_BUS_THS		81
+#define CLK_I2S0		82
+#define CLK_I2S1		83
+#define CLK_I2S2		84
+#define CLK_I2S2_ASRC		85
+#define CLK_BUS_I2S0		86
+#define CLK_BUS_I2S1		87
+#define CLK_BUS_I2S2		88
+#define CLK_SPDIF_TX		89
+#define CLK_SPDIF_RX		90
+#define CLK_BUS_SPDIF		91
+#define CLK_DMIC		92
+#define CLK_BUS_DMIC		93
+#define CLK_AUDIO_DAC		94
+#define CLK_AUDIO_ADC		95
+#define CLK_BUS_AUDIO		96
+#define CLK_USB_OHCI0		97
+#define CLK_USB_OHCI1		98
+#define CLK_BUS_OHCI0		99
+#define CLK_BUS_OHCI1		100
+#define CLK_BUS_EHCI0		101
+#define CLK_BUS_EHCI1		102
+#define CLK_BUS_OTG		103
+#define CLK_BUS_LRADC		104
+#define CLK_BUS_DPSS_TOP	105
+#define CLK_HDMI_24M		106
+#define CLK_HDMI_CEC_32K	107
+#define CLK_HDMI_CEC		108
+#define CLK_BUS_HDMI		109
+#define CLK_MIPI_DSI		110
+#define CLK_BUS_MIPI_DSI	111
+#define CLK_TCON_LCD0		112
+#define CLK_BUS_TCON_LCD0	113
+#define CLK_TCON_TV		114
+#define CLK_BUS_TCON_TV		115
+#define CLK_TVE			116
+#define CLK_BUS_TVE_TOP		117
+#define CLK_BUS_TVE		118
+#define CLK_TVD			119
+#define CLK_BUS_TVD_TOP		120
+#define CLK_BUS_TVD		121
+#define CLK_LEDC		122
+#define CLK_BUS_LEDC		123
+#define CLK_CSI_TOP		124
+#define CLK_CSI_MCLK		125
+#define CLK_BUS_CSI		126
+#define CLK_TPADC		127
+#define CLK_BUS_TPADC		128
+#define CLK_BUS_TZMA		129
+#define CLK_DSP			130
+#define CLK_BUS_DSP_CFG		131
+#define CLK_RISCV		132
+#define CLK_RISCV_AXI		133
+#define CLK_BUS_RISCV_CFG	134
+#define CLK_FANOUT_24M		135
+#define CLK_FANOUT_12M		136
+#define CLK_FANOUT_16M		137
+#define CLK_FANOUT_25M		138
+#define CLK_FANOUT_32K		139
+#define CLK_FANOUT_27M		140
+#define CLK_FANOUT_PCLK		141
+#define CLK_FANOUT0		142
+#define CLK_FANOUT1		143
+#define CLK_FANOUT2		144
+
+#endif /* _DT_BINDINGS_CLK_SUN20I_D1_CCU_H_ */
diff --git a/include/dt-bindings/clock/sun20i-d1-r-ccu.h b/include/dt-bindings/clock/sun20i-d1-r-ccu.h
new file mode 100644
index 000000000000..4c2697fd32b0
--- /dev/null
+++ b/include/dt-bindings/clock/sun20i-d1-r-ccu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/*
+ * Copyright (C) 2021 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN20I_D1_R_CCU_H_
+#define _DT_BINDINGS_CLK_SUN20I_D1_R_CCU_H_
+
+#define CLK_R_AHB		0
+
+#define CLK_BUS_R_TIMER		2
+#define CLK_BUS_R_TWD		3
+#define CLK_BUS_R_PPU		4
+#define CLK_R_IR_RX		5
+#define CLK_BUS_R_IR_RX		6
+#define CLK_BUS_R_RTC		7
+#define CLK_BUS_R_CPUCFG	8
+
+#endif /* _DT_BINDINGS_CLK_SUN20I_D1_R_CCU_H_ */
diff --git a/include/dt-bindings/reset/sun20i-d1-ccu.h b/include/dt-bindings/reset/sun20i-d1-ccu.h
new file mode 100644
index 000000000000..de9ff5203239
--- /dev/null
+++ b/include/dt-bindings/reset/sun20i-d1-ccu.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/*
+ * Copyright (c) 2020 huangzhenwei@allwinnertech.com
+ * Copyright (C) 2021 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN20I_D1_CCU_H_
+#define _DT_BINDINGS_RST_SUN20I_D1_CCU_H_
+
+#define RST_MBUS		0
+#define RST_BUS_DE		1
+#define RST_BUS_DI		2
+#define RST_BUS_G2D		3
+#define RST_BUS_CE		4
+#define RST_BUS_VE		5
+#define RST_BUS_DMA		6
+#define RST_BUS_MSGBOX0		7
+#define RST_BUS_MSGBOX1		8
+#define RST_BUS_MSGBOX2		9
+#define RST_BUS_SPINLOCK	10
+#define RST_BUS_HSTIMER		11
+#define RST_BUS_DBG		12
+#define RST_BUS_PWM		13
+#define RST_BUS_DRAM		14
+#define RST_BUS_MMC0		15
+#define RST_BUS_MMC1		16
+#define RST_BUS_MMC2		17
+#define RST_BUS_UART0		18
+#define RST_BUS_UART1		19
+#define RST_BUS_UART2		20
+#define RST_BUS_UART3		21
+#define RST_BUS_UART4		22
+#define RST_BUS_UART5		23
+#define RST_BUS_I2C0		24
+#define RST_BUS_I2C1		25
+#define RST_BUS_I2C2		26
+#define RST_BUS_I2C3		27
+#define RST_BUS_SPI0		28
+#define RST_BUS_SPI1		29
+#define RST_BUS_EMAC		30
+#define RST_BUS_IR_TX		31
+#define RST_BUS_GPADC		32
+#define RST_BUS_THS		33
+#define RST_BUS_I2S0		34
+#define RST_BUS_I2S1		35
+#define RST_BUS_I2S2		36
+#define RST_BUS_SPDIF		37
+#define RST_BUS_DMIC		38
+#define RST_BUS_AUDIO		39
+#define RST_USB_PHY0		40
+#define RST_USB_PHY1		41
+#define RST_BUS_OHCI0		42
+#define RST_BUS_OHCI1		43
+#define RST_BUS_EHCI0		44
+#define RST_BUS_EHCI1		45
+#define RST_BUS_OTG		46
+#define RST_BUS_LRADC		47
+#define RST_BUS_DPSS_TOP	48
+#define RST_BUS_HDMI_SUB	49
+#define RST_BUS_HDMI_MAIN	50
+#define RST_BUS_MIPI_DSI	51
+#define RST_BUS_TCON_LCD0	52
+#define RST_BUS_TCON_TV		53
+#define RST_BUS_LVDS0		54
+#define RST_BUS_TVE		55
+#define RST_BUS_TVE_TOP		56
+#define RST_BUS_TVD		57
+#define RST_BUS_TVD_TOP		58
+#define RST_BUS_LEDC		59
+#define RST_BUS_CSI		60
+#define RST_BUS_TPADC		61
+#define RST_DSP			62
+#define RST_BUS_DSP_CFG		63
+#define RST_BUS_DSP_DBG		64
+#define RST_BUS_RISCV_CFG	65
+
+#endif /* _DT_BINDINGS_RST_SUN20I_D1_CCU_H_ */
diff --git a/include/dt-bindings/reset/sun20i-d1-r-ccu.h b/include/dt-bindings/reset/sun20i-d1-r-ccu.h
new file mode 100644
index 000000000000..d93d6423d283
--- /dev/null
+++ b/include/dt-bindings/reset/sun20i-d1-r-ccu.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/*
+ * Copyright (C) 2021 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN20I_D1_R_CCU_H_
+#define _DT_BINDINGS_RST_SUN20I_D1_R_CCU_H_
+
+#define RST_BUS_R_TIMER		0
+#define RST_BUS_R_TWD		1
+#define RST_BUS_R_PPU		2
+#define RST_BUS_R_IR_RX		3
+#define RST_BUS_R_RTC		4
+#define RST_BUS_R_CPUCFG	5
+
+#endif /* _DT_BINDINGS_RST_SUN20I_D1_R_CCU_H_ */
-- 
cgit v1.2.3


From c214f124161d446b340597e7c968e0a2dc149142 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Tue, 9 Nov 2021 19:57:10 +0000
Subject: arch_topology: Introduce thermal pressure update function

The thermal pressure is a mechanism which is used for providing
information about reduced CPU performance to the scheduler. Usually code
has to convert the value from frequency units into capacity units,
which are understandable by the scheduler. Create a common conversion code
which can be just used via a handy API.

Internally, the topology_update_thermal_pressure() operates on frequency
in MHz and max CPU frequency is taken from 'freq_factor' (per-cpu).

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Reviewed-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/arch_topology.h  | 3 +++
 include/linux/sched/topology.h | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index b97cea83b25e..ace1e5dcf773 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -59,6 +59,9 @@ static inline unsigned long topology_get_thermal_pressure(int cpu)
 void topology_set_thermal_pressure(const struct cpumask *cpus,
 				   unsigned long th_pressure);
 
+void topology_update_thermal_pressure(const struct cpumask *cpus,
+				      unsigned long capped_freq);
+
 struct cpu_topology {
 	int thread_id;
 	int core_id;
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index c07bfa2d80f2..6e89a8e43aa7 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -273,6 +273,13 @@ void arch_set_thermal_pressure(const struct cpumask *cpus,
 { }
 #endif
 
+#ifndef arch_update_thermal_pressure
+static __always_inline
+void arch_update_thermal_pressure(const struct cpumask *cpus,
+				  unsigned long capped_frequency)
+{ }
+#endif
+
 static inline int task_node(const struct task_struct *p)
 {
 	return cpu_to_node(task_cpu(p));
-- 
cgit v1.2.3


From 7e97b3dc2556743dd02612c92a8de7026e8d7dc9 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Tue, 9 Nov 2021 19:57:14 +0000
Subject: arch_topology: Remove unused topology_set_thermal_pressure() and
 related

There is no need of this function (and related) since code has been
converted to use the new arch_update_thermal_pressure() API. The old
code can be removed.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/arch_topology.h  | 3 ---
 include/linux/sched/topology.h | 7 -------
 2 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index ace1e5dcf773..cce6136b300a 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -56,9 +56,6 @@ static inline unsigned long topology_get_thermal_pressure(int cpu)
 	return per_cpu(thermal_pressure, cpu);
 }
 
-void topology_set_thermal_pressure(const struct cpumask *cpus,
-				   unsigned long th_pressure);
-
 void topology_update_thermal_pressure(const struct cpumask *cpus,
 				      unsigned long capped_freq);
 
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 6e89a8e43aa7..8054641c0a7b 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -266,13 +266,6 @@ unsigned long arch_scale_thermal_pressure(int cpu)
 }
 #endif
 
-#ifndef arch_set_thermal_pressure
-static __always_inline
-void arch_set_thermal_pressure(const struct cpumask *cpus,
-			       unsigned long th_pressure)
-{ }
-#endif
-
 #ifndef arch_update_thermal_pressure
 static __always_inline
 void arch_update_thermal_pressure(const struct cpumask *cpus,
-- 
cgit v1.2.3


From 71b597ef5d46a326fb0d5cbfc1c6ff1d73cdc7f9 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Wed, 17 Nov 2021 21:18:36 -0600
Subject: dt-bindings: clock: sunxi: Export CLK_DRAM for devfreq

The MBUS node needs to reference the CLK_DRAM clock, as the MBUS
hardware implements memory dynamic frequency scaling using this clock.

Export this clock for SoCs which will be getting a devfreq driver.

Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20211118031841.42315-2-samuel@sholland.org
---
 include/dt-bindings/clock/sun50i-a64-ccu.h | 2 +-
 include/dt-bindings/clock/sun8i-h3-ccu.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h
index 318eb15c414c..175892189e9d 100644
--- a/include/dt-bindings/clock/sun50i-a64-ccu.h
+++ b/include/dt-bindings/clock/sun50i-a64-ccu.h
@@ -113,7 +113,7 @@
 #define CLK_USB_OHCI0		91
 
 #define CLK_USB_OHCI1		93
-
+#define CLK_DRAM		94
 #define CLK_DRAM_VE		95
 #define CLK_DRAM_CSI		96
 #define CLK_DRAM_DEINTERLACE	97
diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h
index 30d2d15373a2..5d4ada2c22e6 100644
--- a/include/dt-bindings/clock/sun8i-h3-ccu.h
+++ b/include/dt-bindings/clock/sun8i-h3-ccu.h
@@ -126,7 +126,7 @@
 #define CLK_USB_OHCI1		93
 #define CLK_USB_OHCI2		94
 #define CLK_USB_OHCI3		95
-
+#define CLK_DRAM		96
 #define CLK_DRAM_VE		97
 #define CLK_DRAM_CSI		98
 #define CLK_DRAM_DEINTERLACE	99
-- 
cgit v1.2.3


From a0c2ccd9b5ad0a9e838158404e041b5a8ff762dd Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Tue, 23 Nov 2021 15:50:46 +0800
Subject: mctp: Add MCTP-over-serial transport binding

This change adds a MCTP Serial transport binding, as defined by DMTF
specificiation DSP0253 - "MCTP Serial Transport Binding". This is
implemented as a new serial line discipline, and can be attached to
arbitrary tty devices.

From the Kconfig description:

  This driver provides an MCTP-over-serial interface, through a
  serial line-discipline, as defined by DMTF specification "DSP0253 -
  MCTP Serial Transport Binding". By attaching the ldisc to a serial
  device, we get a new net device to transport MCTP packets.

  This allows communication with external MCTP endpoints which use
  serial as their transport. It can also be used as an easy way to
  provide MCTP connectivity between virtual machines, by forwarding
  data between simple virtual serial devices.

  Say y here if you need to connect to MCTP endpoints over serial. To
  compile as a module, use m; the module will be called mctp-serial.

Once the N_MCTP line discipline is set [using ioctl(TCIOSETD)], we get a
new netdev suitable for MCTP communication.

The 'mctp' utility[1] provides a simple wrapper for this ioctl, using
'link serial <device>':

  # mctp link serial /dev/ttyS0 &
  # mctp link
  dev lo index 1 address 0x00:00:00:00:00:00 net 1 mtu 65536 up
  dev mctpserial0 index 5 address 0x(no-addr) net 1 mtu 68 down

[1]: https://github.com/CodeConstruct/mctp

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tty.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h
index 376cccf397be..a58deb3061eb 100644
--- a/include/uapi/linux/tty.h
+++ b/include/uapi/linux/tty.h
@@ -38,5 +38,6 @@
 #define N_NCI		25	/* NFC NCI UART */
 #define N_SPEAKUP	26	/* Speakup communication with synths */
 #define N_NULL		27	/* Null ldisc used for error handling */
+#define N_MCTP		28	/* MCTP-over-serial */
 
 #endif /* _UAPI_LINUX_TTY_H */
-- 
cgit v1.2.3


From 1e84dc6b7bbfc4d1dd846decece4611b7e035772 Mon Sep 17 00:00:00 2001
From: Yajun Deng <yajun.deng@linux.dev>
Date: Tue, 23 Nov 2021 10:54:30 +0800
Subject: neigh: introduce neigh_confirm() helper function

Add neigh_confirm() for the confirmed member in struct neighbour,
it can be called as an independent unit by other functions.

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h       |  8 +-------
 include/net/ndisc.h     | 16 ++--------------
 include/net/neighbour.h | 11 +++++++++++
 include/net/sock.h      |  5 +----
 4 files changed, 15 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/arp.h b/include/net/arp.h
index 4950191f6b2b..031374ac2f22 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -53,13 +53,7 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
 
 	rcu_read_lock_bh();
 	n = __ipv4_neigh_lookup_noref(dev, key);
-	if (n) {
-		unsigned long now = jiffies;
-
-		/* avoid dirtying neighbour */
-		if (READ_ONCE(n->confirmed) != now)
-			WRITE_ONCE(n->confirmed, now);
-	}
+	neigh_confirm(n);
 	rcu_read_unlock_bh();
 }
 
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 04341d86585d..53cb8de0e589 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -411,13 +411,7 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
 
 	rcu_read_lock_bh();
 	n = __ipv6_neigh_lookup_noref(dev, pkey);
-	if (n) {
-		unsigned long now = jiffies;
-
-		/* avoid dirtying neighbour */
-		if (READ_ONCE(n->confirmed) != now)
-			WRITE_ONCE(n->confirmed, now);
-	}
+	neigh_confirm(n);
 	rcu_read_unlock_bh();
 }
 
@@ -428,13 +422,7 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
 
 	rcu_read_lock_bh();
 	n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
-	if (n) {
-		unsigned long now = jiffies;
-
-		/* avoid dirtying neighbour */
-		if (READ_ONCE(n->confirmed) != now)
-			WRITE_ONCE(n->confirmed, now);
-	}
+	neigh_confirm(n);
 	rcu_read_unlock_bh();
 }
 
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 38a0c1d24570..55af812c3ed5 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -321,6 +321,17 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl,
 	return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev);
 }
 
+static inline void neigh_confirm(struct neighbour *n)
+{
+	if (n) {
+		unsigned long now = jiffies;
+
+		/* avoid dirtying neighbour */
+		if (READ_ONCE(n->confirmed) != now)
+			WRITE_ONCE(n->confirmed, now);
+	}
+}
+
 void neigh_table_init(int index, struct neigh_table *tbl);
 int neigh_table_clear(int index, struct neigh_table *tbl);
 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
diff --git a/include/net/sock.h b/include/net/sock.h
index a79fc772324e..e7c231373875 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2135,13 +2135,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
 {
 	if (skb_get_dst_pending_confirm(skb)) {
 		struct sock *sk = skb->sk;
-		unsigned long now = jiffies;
 
-		/* avoid dirtying neighbour */
-		if (READ_ONCE(n->confirmed) != now)
-			WRITE_ONCE(n->confirmed, now);
 		if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
 			WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
+		neigh_confirm(n);
 	}
 }
 
-- 
cgit v1.2.3


From cff6f593251cdf5398dc3c57f7032b8e9dcb633e Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 23 Nov 2021 12:36:47 +0200
Subject: regulator: rohm-generic: iniline stub function

The function rohm_regulator_set_voltage_sel_restricted() has a stub
implementation. Linux-next testing spot following:

include/linux/mfd/rohm-generic.h:93:12: error:
'rohm_regulator_set_voltage_sel_restricted' defined but not used

Fix this by inlining the stub.

Fixes: 8b6e88555971 ("regulator: rohm-regulator: add helper for restricted voltage setting")
Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/YZzEP3S7U15bTDAI@fedora
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/rohm-generic.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 35c5866f48b7..080d60adcd5f 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -90,7 +90,8 @@ static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dv
 {
 	return 0;
 }
-static int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev,
+
+static inline int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev,
 						     unsigned int sel)
 {
 	return 0;
-- 
cgit v1.2.3


From 2106efda785b55a8957efed9a52dfa28ee0d7280 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 22 Nov 2021 17:24:47 -0800
Subject: net: remove .ndo_change_proto_down

.ndo_change_proto_down was added seemingly to enable out-of-tree
implementations. Over 2.5yrs later we still have no real users
upstream. Hardwire the generic implementation for now, we can
revert once real users materialize. (rocker is a test vehicle,
not a user.)

We need to drop the optimization on the sysfs side, because
unlike ndos priv_flags will be changed at runtime, so we'd
need READ_ONCE/WRITE_ONCE everywhere..

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index df049864661d..db3bff1ae7fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1297,11 +1297,6 @@ struct netdev_net_notifier {
  *	TX queue.
  * int (*ndo_get_iflink)(const struct net_device *dev);
  *	Called to get the iflink value of this device.
- * void (*ndo_change_proto_down)(struct net_device *dev,
- *				 bool proto_down);
- *	This function is used to pass protocol port error state information
- *	to the switch driver. The switch driver can react to the proto_down
- *      by doing a phys down on the associated switch port.
  * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
  *	This function is used to get egress tunnel information for given skb.
  *	This is useful for retrieving outer tunnel header parameters while
@@ -1542,8 +1537,6 @@ struct net_device_ops {
 						      int queue_index,
 						      u32 maxrate);
 	int			(*ndo_get_iflink)(const struct net_device *dev);
-	int			(*ndo_change_proto_down)(struct net_device *dev,
-							 bool proto_down);
 	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
 						       struct sk_buff *skb);
 	void			(*ndo_set_rx_headroom)(struct net_device *dev,
@@ -1612,6 +1605,7 @@ struct net_device_ops {
  * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
  * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
  *	skb_headlen(skb) == 0 (data starts from frag0)
+ * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1646,6 +1640,7 @@ enum netdev_priv_flags {
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
 	IFF_LIVE_RENAME_OK		= 1<<30,
 	IFF_TX_SKB_NO_LINEAR		= 1<<31,
+	IFF_CHANGE_PROTO_DOWN		= BIT_ULL(32),
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1982,7 +1977,7 @@ struct net_device {
 
 	/* Read-mostly cache-line for fast-path access */
 	unsigned int		flags;
-	unsigned int		priv_flags;
+	unsigned long long	priv_flags;
 	const struct net_device_ops *netdev_ops;
 	int			ifindex;
 	unsigned short		gflags;
@@ -3735,7 +3730,6 @@ int dev_get_port_parent_id(struct net_device *dev,
 			   struct netdev_phys_item_id *ppid, bool recurse);
 bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_proto_down_generic(struct net_device *dev, bool proto_down);
 void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
 				  u32 value);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
-- 
cgit v1.2.3


From 985e9ece1e55a94da842f6c1f9ff84d587b26267 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 17 Nov 2021 20:07:35 +0200
Subject: ACPI: Make acpi_node_get_parent() local

acpi_node_get_parent() isn't used outside drivers/acpi/property.c.

Make it local.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 668d007f0917..b28f8790192a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1182,7 +1182,6 @@ int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
 
 struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child);
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode);
 
 struct acpi_probe_entry;
 typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
@@ -1287,12 +1286,6 @@ acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 	return NULL;
 }
 
-static inline struct fwnode_handle *
-acpi_node_get_parent(const struct fwnode_handle *fwnode)
-{
-	return NULL;
-}
-
 static inline struct fwnode_handle *
 acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
 			     struct fwnode_handle *prev)
-- 
cgit v1.2.3


From 37a72b08a3e1eb28053214dd8211eb09c2fd3187 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 22 Oct 2021 08:47:56 +0200
Subject: xen: add "not_essential" flag to struct xenbus_driver

When booting the xenbus driver will wait for PV devices to have
connected to their backends before continuing. The timeout is different
between essential and non-essential devices.

Non-essential devices are identified by their nodenames directly in the
xenbus driver, which requires to update this list in case a new device
type being non-essential is added (this was missed for several types
in the past).

In order to avoid this problem, add a "not_essential" flag to struct
xenbus_driver which can be set to "true" by the respective frontend.

Set this flag for the frontends currently regarded to be not essential
(vkbs and vfb) and use it for testing in the xenbus driver.

Signed-off-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20211022064800.14978-2-jgross@suse.com
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 include/xen/xenbus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index b94074c82772..b13eb86395e0 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -112,6 +112,7 @@ struct xenbus_driver {
 	const char *name;       /* defaults to ids[0].devicetype */
 	const struct xenbus_device_id *ids;
 	bool allow_rebind; /* avoid setting xenstore closed during remove */
+	bool not_essential;     /* is not mandatory for boot progress */
 	int (*probe)(struct xenbus_device *dev,
 		     const struct xenbus_device_id *id);
 	void (*otherend_changed)(struct xenbus_device *dev,
-- 
cgit v1.2.3


From c6d5f1933085f9a92ed5c256a859ab31c7a35f88 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Mon, 22 Nov 2021 12:19:31 +0100
Subject: net: stmmac: Calculate CDC error only once

The clock domain crossing error (CDC) is calculated at every fetch of Tx or Rx
timestamps. It includes a division. Especially on arm32 based systems it is
expensive. It also requires two conditionals in the hotpath.

Add a compensation value cache to struct plat_stmmacenet_data and subtract it
unconditionally in the RX/TX functions which spares the conditionals.

The value is initialized to 0 and if supported calculated in the PTP
initialization code.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://lore.kernel.org/r/20211122111931.135135-1-kurt@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a6f03b36fc4f..89b8e208cd7b 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -241,6 +241,7 @@ struct plat_stmmacenet_data {
 	unsigned int clk_ref_rate;
 	unsigned int mult_fact_100ns;
 	s32 ptp_max_adj;
+	u32 cdc_error_adj;
 	struct reset_control *stmmac_rst;
 	struct reset_control *stmmac_ahb_rst;
 	struct stmmac_axi *axi;
-- 
cgit v1.2.3


From e7049395b1c3085d12b5ba16d058c65598368853 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Mon, 22 Nov 2021 19:16:21 +0900
Subject: dccp/tcp: Remove an unused argument in inet_csk_listen_start().

The commit 1295e2cf3065 ("inet: minor optimization for backlog setting in
listen(2)") added change so that sk_max_ack_backlog is initialised earlier
in inet_dccp_listen() and inet_listen().  Since then, we no longer use
backlog in inet_csk_listen_start(), so let's remove it.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Acked-by: Yafang Shao <laoar.shao@gmail.com>
Reviewed-by: Richard Sailer <richard_siegfried@systemli.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_connection_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fa6a87246a7b..4ad47d9f9d27 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -304,7 +304,7 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
 			(EPOLLIN | EPOLLRDNORM) : 0;
 }
 
-int inet_csk_listen_start(struct sock *sk, int backlog);
+int inet_csk_listen_start(struct sock *sk);
 void inet_csk_listen_stop(struct sock *sk);
 
 void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-- 
cgit v1.2.3


From 86c82c8aeebf6db5df8ab73cec8333853c405070 Mon Sep 17 00:00:00 2001
From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Date: Wed, 24 Nov 2021 11:23:55 +0200
Subject: Revert "drm/i915/dg2: Tile 4 plane format support"

Tile4 patch still needs an ack from userspace,
IGT tests and some essential fixes, related to
new .plane_caps attribute being added.

This reverts commit 3c542cfa8266e3364938d055b3d548b7bed7f08e.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Acked-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211124092355.16668-1-stanislav.lisovskiy@intel.com
---
 include/uapi/drm/drm_fourcc.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 41184a94935d..7f652c96845b 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -564,14 +564,6 @@ extern "C" {
  * pitch is required to be a multiple of 4 tile widths.
  */
 #define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8)
-/*
- * Intel F-tiling(aka Tile4) layout
- *
- * This is a tiled layout using 4Kb tiles in row-major layout.
- * Within the tile pixels are laid out in 64 byte units / sub-tiles in OWORD
- * (16 bytes) chunks column-major..
- */
-#define I915_FORMAT_MOD_4_TILED         fourcc_mod_code(INTEL, 9)
 
 /*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
-- 
cgit v1.2.3


From 393c3714081a53795bbff0e985d24146def6f57f Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 18 Nov 2021 15:00:08 -0800
Subject: kernfs: switch global kernfs_rwsem lock to per-fs lock

The kernfs implementation has big lock granularity(kernfs_rwsem) so
every kernfs-based(e.g., sysfs, cgroup) fs are able to compete the
lock. It makes trouble for some cases to wait the global lock
for a long time even though they are totally independent contexts
each other.

A general example is process A goes under direct reclaim with holding
the lock when it accessed the file in sysfs and process B is waiting
the lock with exclusive mode and then process C is waiting the lock
until process B could finish the job after it gets the lock from
process A.

This patch switches the global kernfs_rwsem to per-fs lock, which
put the rwsem into kernfs_root.

Suggested-by: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Link: https://lore.kernel.org/r/20211118230008.2679780-1-minchan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernfs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 3ccce6f24548..9f650986a81b 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -16,6 +16,7 @@
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
 #include <linux/wait.h>
+#include <linux/rwsem.h>
 
 struct file;
 struct dentry;
@@ -197,6 +198,7 @@ struct kernfs_root {
 	struct list_head	supers;
 
 	wait_queue_head_t	deactivate_waitq;
+	struct rw_semaphore	kernfs_rwsem;
 };
 
 struct kernfs_open_file {
-- 
cgit v1.2.3


From b456abe63f60ad93c83a526d33b71574bc32656c Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 19 Nov 2021 17:08:50 -0600
Subject: ALSA: pcm: introduce INFO_NO_REWINDS flag

When the hardware can only deal with a monotonically increasing
appl_ptr, this flag can be set.

In case the application requests a rewind, be it with a
snd_pcm_rewind() or with a direct change of a mmap'ed pointer followed
by a SNDRV_PCM_IOCTL_SYNC_PTR, this patch checks if a rewind
occurred and returns an error.

Credits to Takashi Iwai for identifying the path with SYNC_PTR and
suggesting the pointer checks.

Suggested-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20211119230852.206310-3-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/asound.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 5fbb79e30819..ff7e638221c5 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -300,7 +300,7 @@ typedef int __bitwise snd_pcm_subformat_t;
 #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME    0x04000000  /* report estimated link audio time */
 #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000  /* report synchronized audio/system time */
 #define SNDRV_PCM_INFO_EXPLICIT_SYNC	0x10000000	/* needs explicit sync of pointers and data */
-
+#define SNDRV_PCM_INFO_NO_REWINDS	0x20000000	/* hardware can only support monotonic changes of appl_ptr */
 #define SNDRV_PCM_INFO_DRAIN_TRIGGER	0x40000000		/* internal kernel flag - trigger in drain */
 #define SNDRV_PCM_INFO_FIFO_IN_FRAMES	0x80000000	/* internal kernel flag - FIFO size is in frames */
 
-- 
cgit v1.2.3


From 1b6ed6bf32fb22ef8e3572fc9c0f6454adf1ca40 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 24 Nov 2021 09:17:37 +0200
Subject: regulator: Drop unnecessary struct member

The irq_flags from the regulator IRQ helper description struct was never
used. The IRQ flags are passed as parameters to helper registration
instead.

Remove the unnecessary struct field.

Fixes: 7111c6d1b31b ("regulator: IRQ based event/error notification helpers")
Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/5f6371e178453fa2b165da50452f7db4e986debb.1637736436.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 1cb8071fee34..53b25cd7ead0 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -554,7 +554,6 @@ struct regulator_irq_data {
  */
 struct regulator_irq_desc {
 	const char *name;
-	int irq_flags;
 	int fatal_cnt;
 	int reread_ms;
 	int irq_off_ms;
-- 
cgit v1.2.3


From 6fadec4c5561e2fbe1dfa8a7da9bc58d094a8f04 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 24 Nov 2021 09:16:45 +0200
Subject: regulator: Add regulator_err2notif() helper

Help drivers avoid storing both supported notification and supported error
flags by supporting conversion from regulator error to notification.
This may help saving some bytes.

Add helper for finding the regulator notification corresponding to a
regulator error.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/eb1755ac0569ff07ffa466cf8912c6fd50e7c7c6.1637736436.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 53b25cd7ead0..6c6ec9658c30 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -645,6 +645,40 @@ struct regulator_dev {
 	spinlock_t err_lock;
 };
 
+/*
+ * Convert error flags to corresponding notifications.
+ *
+ * Can be used by drivers which use the notification helpers to
+ * find out correct notification flags based on the error flags. Drivers
+ * can avoid storing both supported notification and error flags which
+ * may save few bytes.
+ */
+static inline int regulator_err2notif(int err)
+{
+	switch (err) {
+	case REGULATOR_ERROR_UNDER_VOLTAGE:
+		return REGULATOR_EVENT_UNDER_VOLTAGE;
+	case REGULATOR_ERROR_OVER_CURRENT:
+		return REGULATOR_EVENT_OVER_CURRENT;
+	case REGULATOR_ERROR_REGULATION_OUT:
+		return REGULATOR_EVENT_REGULATION_OUT;
+	case REGULATOR_ERROR_FAIL:
+		return REGULATOR_EVENT_FAIL;
+	case REGULATOR_ERROR_OVER_TEMP:
+		return REGULATOR_EVENT_OVER_TEMP;
+	case REGULATOR_ERROR_UNDER_VOLTAGE_WARN:
+		return REGULATOR_EVENT_UNDER_VOLTAGE_WARN;
+	case REGULATOR_ERROR_OVER_CURRENT_WARN:
+		return REGULATOR_EVENT_OVER_CURRENT_WARN;
+	case REGULATOR_ERROR_OVER_VOLTAGE_WARN:
+		return REGULATOR_EVENT_OVER_VOLTAGE_WARN;
+	case REGULATOR_ERROR_OVER_TEMP_WARN:
+		return REGULATOR_EVENT_OVER_TEMP_WARN;
+	}
+	return 0;
+}
+
+
 struct regulator_dev *
 regulator_register(const struct regulator_desc *regulator_desc,
 		   const struct regulator_config *config);
-- 
cgit v1.2.3


From a764ff77d697a4a13e69b3379cc613f7409c6b9a Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 24 Nov 2021 09:17:13 +0200
Subject: regulator: irq_helper: Provide helper for trivial IRQ notifications

Provide a generic map_event helper for regulators which have a notification
IRQ with single, well defined purpose. Eg, IRQ always indicates exactly one
event for exactly one regulator device. For such IRQs the mapping is
trivial.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/603b7ed1938013a00371c1e7ccc63dfb16982b87.1637736436.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 6c6ec9658c30..4078c7776453 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -700,6 +700,8 @@ void *regulator_irq_helper(struct device *dev,
 			   int irq_flags, int common_errs, int *per_rdev_errs,
 			   struct regulator_dev **rdev, int rdev_amount);
 void regulator_irq_helper_cancel(void **handle);
+int regulator_irq_map_event_simple(int irq, struct regulator_irq_data *rid,
+				   unsigned long *dev_mask);
 
 void *rdev_get_drvdata(struct regulator_dev *rdev);
 struct device *rdev_get_dev(struct regulator_dev *rdev);
-- 
cgit v1.2.3


From 432dd1fc134ef902b049b26839edfd3fdc1f8dc0 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 24 Nov 2021 07:57:49 +0200
Subject: regulator: rohm-generic: remove unused dummies

Function rohm_regulator_set_voltage_sel_restricted() and
rohm_regulator_set_dvs_levels() had inlined dummy implementations for
cases when the real implementation was not configured in.

All of the drivers who issue the call to these functions do SELECT the
real implementation from the Kconfig. There should be no cases where the
real implementation was not selected by the drivers using these
functions - such a situation is likely to be an error which deserves to be
noticed at compile-time.

These dummies could in theory be used for compile-testing the drivers
only (without the generic rohm regulator pieces). However, for such
compile testing we should manually drop the selection from KConfig - and
I guess that if it does not work out-of-the-box, then it is not going to
happen. Especially when there should be no reason to omit
compile-testing the generic rohm_regulator part.

Crash test dummies.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/YZ3UXXrk/Efe7Scj@fedora
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/rohm-generic.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 080d60adcd5f..5ed97a1d0908 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -82,20 +82,6 @@ int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs,
 
 int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev,
 					      unsigned int sel);
-#else
-static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs,
-						struct device_node *np,
-						const struct regulator_desc *desc,
-						struct regmap *regmap)
-{
-	return 0;
-}
-
-static inline int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev,
-						     unsigned int sel)
-{
-	return 0;
-}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 2338e7bcef445059a99848a3eddde0b556277663 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 15 Nov 2021 15:10:01 +0300
Subject: device property: Remove device_add_properties() API

There are no more users for it.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/property.h b/include/linux/property.h
index 88fa726a76df..16f736c698a2 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -378,10 +378,6 @@ property_entries_dup(const struct property_entry *properties);
 
 void property_entries_free(const struct property_entry *properties);
 
-int device_add_properties(struct device *dev,
-			  const struct property_entry *properties);
-void device_remove_properties(struct device *dev);
-
 bool device_dma_supported(struct device *dev);
 
 enum dev_dma_attr device_get_dma_attr(struct device *dev);
-- 
cgit v1.2.3


From 6fd13452c1a2e6dfe5b9a4c84c1144383cc55472 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 10 Nov 2021 13:16:40 +0200
Subject: ACPI: processor: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_numa.h | 1 -
 include/acpi/processor.h | 7 ++++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index 68e4d80c1b32..b5f594754a9e 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -3,7 +3,6 @@
 #define __ACPI_NUMA_H
 
 #ifdef CONFIG_ACPI_NUMA
-#include <linux/kernel.h>
 #include <linux/numa.h>
 
 /* Proximity bitmap length */
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 683e124ad517..194027371928 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -2,11 +2,16 @@
 #ifndef __ACPI_PROCESSOR_H
 #define __ACPI_PROCESSOR_H
 
-#include <linux/kernel.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/pm_qos.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
 #include <linux/thermal.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
 #include <asm/acpi.h>
 
 #define ACPI_PROCESSOR_CLASS		"processor"
-- 
cgit v1.2.3


From 04c76b41ca974b508522831441dd7e5b1b59cbb0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 10 Nov 2021 15:49:32 +0000
Subject: io_uring: add option to skip CQE posting

Emitting a CQE is expensive from the kernel perspective. Often, it's
also not convenient for the userspace, spends some cycles on processing
and just complicates the logic. A similar problems goes for linked
requests, where we post an CQE for each request in the link.

Introduce a new flags, IOSQE_CQE_SKIP_SUCCESS, trying to help with it.
When set and a request completed successfully, it won't generate a CQE.
When fails, it produces an CQE, but all following linked requests will
be CQE-less, regardless whether they have IOSQE_CQE_SKIP_SUCCESS or not.
The notion of "fail" is the same as for link failing-cancellation, where
it's opcode dependent, and _usually_ result >= 0 is a success, but not
always.

Linked timeouts are a bit special. When the requests it's linked to was
not attempted to be executed, e.g. failing linked requests, it follows
the description above. Otherwise, whether a linked timeout will post a
completion or not solely depends on IOSQE_CQE_SKIP_SUCCESS of that
linked timeout request. Linked timeout never "fail" during execution, so
for them it's unconditional. It's expected for users to not really care
about the result of it but rely solely on the result of the master
request. Another reason for such a treatment is that it's racy, and the
timeout callback may be running awhile the master request posts its
completion.

use case 1:
If one doesn't care about results of some requests, e.g. normal
timeouts, just set IOSQE_CQE_SKIP_SUCCESS. Error result will still be
posted and need to be handled.

use case 2:
Set IOSQE_CQE_SKIP_SUCCESS for all requests of a link but the last,
and it'll post a completion only for the last one if everything goes
right, otherwise there will be one only one CQE for the first failed
request.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/0220fbe06f7cf99e6fc71b4297bb1cb6c0e89c2c.1636559119.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index c45b5e9a9387..787f491f0d2a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -70,6 +70,7 @@ enum {
 	IOSQE_IO_HARDLINK_BIT,
 	IOSQE_ASYNC_BIT,
 	IOSQE_BUFFER_SELECT_BIT,
+	IOSQE_CQE_SKIP_SUCCESS_BIT,
 };
 
 /*
@@ -87,6 +88,8 @@ enum {
 #define IOSQE_ASYNC		(1U << IOSQE_ASYNC_BIT)
 /* select buffer from sqe->buf_group */
 #define IOSQE_BUFFER_SELECT	(1U << IOSQE_BUFFER_SELECT_BIT)
+/* don't post CQE if request succeeded */
+#define IOSQE_CQE_SKIP_SUCCESS	(1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
 
 /*
  * io_uring_setup() flags
@@ -289,6 +292,7 @@ struct io_uring_params {
 #define IORING_FEAT_EXT_ARG		(1U << 8)
 #define IORING_FEAT_NATIVE_WORKERS	(1U << 9)
 #define IORING_FEAT_RSRC_TAGS		(1U << 10)
+#define IORING_FEAT_CQE_SKIP		(1U << 11)
 
 /*
  * io_uring_register(2) opcodes and arguments
-- 
cgit v1.2.3


From 3f2bedabb62c6210df63b604dc988d2f7f56f947 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 26 Oct 2021 12:03:47 +0200
Subject: futex: Ensure futex_atomic_cmpxchg_inatomic() is present

The boot-time detection of futex_atomic_cmpxchg_inatomic() has a bug on
some 32-bit arm builds, and Thomas Gleixner suggested that setting
CONFIG_HAVE_FUTEX_CMPXCHG would avoid the problem, as it is always present
anyway.

Looking into which other architectures could do the same showed that almost
all architectures have it, the exceptions being:

 - some old 32-bit MIPS uniprocessor cores without ll/sc
 - one xtensa variant with no SMP
 - 32-bit SPARC when built for SMP

Fix MIPS And Xtensa by rearranging the generic code to let it be used
as a fallback.

For SPARC, the SMP definition just ends up turning off futex anyway, so
this can be done at Kconfig time instead. Note that sparc32 glibc requires
the CASA instruction for its mutexes anyway, which is only available when
running on SPARCv9 or LEON CPUs, but needs to be implemented in the sparc32
kernel for those.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Rich Felker <dalias@libc.org>
Link: https://lore.kernel.org/r/20211026100432.1730393-1-arnd@kernel.org
---
 include/asm-generic/futex.h | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index f4c3470480c7..30e7fa63b5df 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -6,15 +6,22 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
+#ifndef futex_atomic_cmpxchg_inatomic
 #ifndef CONFIG_SMP
 /*
  * The following implementation only for uniprocessor machines.
  * It relies on preempt_disable() ensuring mutual exclusion.
  *
  */
+#define futex_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval) \
+	futex_atomic_cmpxchg_inatomic_local_generic(uval, uaddr, oldval, newval)
+#define arch_futex_atomic_op_inuser(op, oparg, oval, uaddr) \
+	arch_futex_atomic_op_inuser_local_generic(op, oparg, oval, uaddr)
+#endif /* CONFIG_SMP */
+#endif
 
 /**
- * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ * arch_futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant
  *			  argument and comparison of the previous
  *			  futex value with another constant.
  *
@@ -28,7 +35,7 @@
  * -ENOSYS - Operation not supported
  */
 static inline int
-arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+futex_atomic_op_inuser_local(int op, u32 oparg, int *oval, u32 __user *uaddr)
 {
 	int oldval, ret;
 	u32 tmp;
@@ -75,7 +82,7 @@ out_pagefault_enable:
 }
 
 /**
- * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ * futex_atomic_cmpxchg_inatomic_local() - Compare and exchange the content of the
  *				uaddr with newval if the current value is
  *				oldval.
  * @uval:	pointer to store content of @uaddr
@@ -87,10 +94,9 @@ out_pagefault_enable:
  * 0 - On success
  * -EFAULT - User access resulted in a page fault
  * -EAGAIN - Atomic operation was unable to complete due to contention
- * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
  */
 static inline int
-futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+futex_atomic_cmpxchg_inatomic_local(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
 	u32 val;
@@ -112,19 +118,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	return 0;
 }
 
-#else
-static inline int
-arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
-{
-	return -ENOSYS;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
-			      u32 oldval, u32 newval)
-{
-	return -ENOSYS;
-}
-
-#endif /* CONFIG_SMP */
 #endif
-- 
cgit v1.2.3


From f124034faa911ed534bf8c4881ad98dbbde2a966 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 24 Nov 2021 18:44:17 -0500
Subject: Revert "virtio_ring: validate used buffer length"

This reverts commit 939779f5152d161b34f612af29e7dc1ac4472fcf.

Attempts to validate length in the core did not work out: there turn out
to exist multiple broken devices, and in particular legacy devices are
known to be broken in this respect.

We have ideas for handling this better in the next version but for now
let's revert to a known good state to make sure drivers work for people.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 44d0e09da2d9..41edbc01ffa4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -152,7 +152,6 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
- * @suppress_used_validation: set to not have core validate used length
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
@@ -169,7 +168,6 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
-	bool suppress_used_validation;
 	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
-- 
cgit v1.2.3


From 083a7fba38885a8ffa03a2857e383421cefd36e6 Mon Sep 17 00:00:00 2001
From: Oder Chiou <oder_chiou@realtek.com>
Date: Thu, 25 Nov 2021 13:58:11 +0800
Subject: ASoC: rt5640: Add the binding include file for the HDA header support

The patch adds the binding include file for the HDA header support.

Signed-off-by: Oder Chiou <oder_chiou@realtek.com>
Link: https://lore.kernel.org/r/20211125055812.8911-1-oder_chiou@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/dt-bindings/sound/rt5640.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/sound/rt5640.h b/include/dt-bindings/sound/rt5640.h
index 154c9b4414f2..655f6946388a 100644
--- a/include/dt-bindings/sound/rt5640.h
+++ b/include/dt-bindings/sound/rt5640.h
@@ -16,6 +16,7 @@
 #define RT5640_JD_SRC_GPIO2		4
 #define RT5640_JD_SRC_GPIO3		5
 #define RT5640_JD_SRC_GPIO4		6
+#define RT5640_JD_SRC_HDA_HEADER	7
 
 #define RT5640_OVCD_SF_0P5		0
 #define RT5640_OVCD_SF_0P75		1
-- 
cgit v1.2.3


From 53db28933e952a8536b002ba8b8c9443ccc0e939 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 25 Nov 2021 14:05:18 +0100
Subject: fuse: extend init flags

FUSE_INIT flags are close to running out, so add another 32bits worth of
space.

Add FUSE_INIT_EXT flag to the old flags field in fuse_init_in.  If this
flag is set, then fuse_init_in is extended by 48bytes, in which a flags_hi
field is allocated to contain the high 32bits of the flags.

A flags_hi field is also added to fuse_init_out, allocated out of the
remaining unused fields.

Known userspace implementations of the fuse protocol have been checked to
accept the extended FUSE_INIT request, but this might cause problems with
other implementations.  If that happens to be the case, the protocol
negotiation will have to be extended with an extra initialization request
roundtrip.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index a1dc3ee1d17c..980f3998c11b 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -187,6 +187,10 @@
  *
  *  7.35
  *  - add FOPEN_NOFLUSH
+ *
+ *  7.36
+ *  - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag
+ *  - add flags2 to fuse_init_in and fuse_init_out
  */
 
 #ifndef _LINUX_FUSE_H
@@ -222,7 +226,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 35
+#define FUSE_KERNEL_MINOR_VERSION 36
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -341,6 +345,8 @@ struct fuse_file_lock {
  *			write/truncate sgid is killed only if file has group
  *			execute permission. (Same as Linux VFS behavior).
  * FUSE_SETXATTR_EXT:	Server supports extended struct fuse_setxattr_in
+ * FUSE_INIT_EXT: extended fuse_init_in request
+ * FUSE_INIT_RESERVED: reserved, do not use
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -372,6 +378,9 @@ struct fuse_file_lock {
 #define FUSE_SUBMOUNTS		(1 << 27)
 #define FUSE_HANDLE_KILLPRIV_V2	(1 << 28)
 #define FUSE_SETXATTR_EXT	(1 << 29)
+#define FUSE_INIT_EXT		(1 << 30)
+#define FUSE_INIT_RESERVED	(1 << 31)
+/* bits 32..63 get shifted down 32 bits into the flags2 field */
 
 /**
  * CUSE INIT request/reply flags
@@ -741,6 +750,8 @@ struct fuse_init_in {
 	uint32_t	minor;
 	uint32_t	max_readahead;
 	uint32_t	flags;
+	uint32_t	flags2;
+	uint32_t	unused[11];
 };
 
 #define FUSE_COMPAT_INIT_OUT_SIZE 8
@@ -757,7 +768,8 @@ struct fuse_init_out {
 	uint32_t	time_gran;
 	uint16_t	max_pages;
 	uint16_t	map_alignment;
-	uint32_t	unused[8];
+	uint32_t	flags2;
+	uint32_t	unused[7];
 };
 
 #define CUSE_INIT_INFO_MAX 4096
-- 
cgit v1.2.3


From 3e2b6fdbdc9ab5a02d9d5676a005f30780b97553 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Thu, 11 Nov 2021 09:32:49 -0500
Subject: fuse: send security context of inode on file

When a new inode is created, send its security context to server along with
creation request (FUSE_CREAT, FUSE_MKNOD, FUSE_MKDIR and FUSE_SYMLINK).
This gives server an opportunity to create new file and set security
context (possibly atomically).  In all the configurations it might not be
possible to set context atomically.

Like nfs and ceph, use security_dentry_init_security() to dermine security
context of inode and send it with create, mkdir, mknod, and symlink
requests.

Following is the information sent to server.

fuse_sectx_header, fuse_secctx, xattr_name, security_context

 - struct fuse_secctx_header
   This contains total number of security contexts being sent and total
   size of all the security contexts (including size of
   fuse_secctx_header).

 - struct fuse_secctx
   This contains size of security context which follows this structure.
   There is one fuse_secctx instance per security context.

 - xattr name string
   This string represents name of xattr which should be used while setting
   security context.

 - security context
   This is the actual security context whose size is specified in
   fuse_secctx struct.

Also add the FUSE_SECURITY_CTX flag for the `flags` field of the
fuse_init_out struct.  When this flag is set the kernel will append the
security context for a newly created inode to the request (create, mkdir,
mknod, and symlink).  The server is responsible for ensuring that the inode
appears atomically (preferrably) with the requested security context.

For example, If the server is using SELinux and backed by a "real" linux
file system that supports extended attributes it can write the security
context value to /proc/thread-self/attr/fscreate before making the syscall
to create the inode.

This patch is based on patch from Chirantan Ekbote <chirantan@chromium.org>

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 980f3998c11b..3f0ea63fec08 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -191,6 +191,8 @@
  *  7.36
  *  - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag
  *  - add flags2 to fuse_init_in and fuse_init_out
+ *  - add FUSE_SECURITY_CTX init flag
+ *  - add security context to create, mkdir, symlink, and mknod requests
  */
 
 #ifndef _LINUX_FUSE_H
@@ -347,6 +349,8 @@ struct fuse_file_lock {
  * FUSE_SETXATTR_EXT:	Server supports extended struct fuse_setxattr_in
  * FUSE_INIT_EXT: extended fuse_init_in request
  * FUSE_INIT_RESERVED: reserved, do not use
+ * FUSE_SECURITY_CTX:	add security context to create, mkdir, symlink, and
+ *			mknod
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -381,6 +385,7 @@ struct fuse_file_lock {
 #define FUSE_INIT_EXT		(1 << 30)
 #define FUSE_INIT_RESERVED	(1 << 31)
 /* bits 32..63 get shifted down 32 bits into the flags2 field */
+#define FUSE_SECURITY_CTX	(1ULL << 32)
 
 /**
  * CUSE INIT request/reply flags
@@ -877,9 +882,12 @@ struct fuse_dirent {
 	char name[];
 };
 
-#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) \
+/* Align variable length records to 64bit boundary */
+#define FUSE_REC_ALIGN(x) \
 	(((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x)
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
 
@@ -996,4 +1004,26 @@ struct fuse_syncfs_in {
 	uint64_t	padding;
 };
 
+/*
+ * For each security context, send fuse_secctx with size of security context
+ * fuse_secctx will be followed by security context name and this in turn
+ * will be followed by actual context label.
+ * fuse_secctx, name, context
+ */
+struct fuse_secctx {
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+/*
+ * Contains the information about how many fuse_secctx structures are being
+ * sent and what's the total size of all security contexts (including
+ * size of fuse_secctx_header).
+ *
+ */
+struct fuse_secctx_header {
+	uint32_t	size;
+	uint32_t	nr_secctx;
+};
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3


From 635e4172bd0a43af943fb164799965fc9a9a705d Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Tue, 2 Nov 2021 07:38:10 +0100
Subject: arm: remove zte zx platform left-over

Commit 89d4f98ae90d ("ARM: remove zte zx platform") missed to remove some
definitions for this platform's debug and serial, e.g., code dependent on
the config DEBUG_ZTE_ZX.

Fortunately, ./scripts/checkkconfigsymbols.py detects this and warns:

DEBUG_ZTE_ZX
Referencing files: arch/arm/include/debug/pl01x.S

Further review by Arnd Bergmann identified even more dead code in the
amba serial driver.

Remove all this left-over from the zte zx platform.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20211102063810.932-1-lukas.bulwahn@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/amba/bus.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index edfcf7a14dcd..6c7f47846971 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -90,14 +90,8 @@ enum amba_vendor {
 	AMBA_VENDOR_ST = 0x80,
 	AMBA_VENDOR_QCOM = 0x51,
 	AMBA_VENDOR_LSI = 0xb6,
-	AMBA_VENDOR_LINUX = 0xfe,	/* This value is not official */
 };
 
-/* This is used to generate pseudo-ID for AMBA device */
-#define AMBA_LINUX_ID(conf, rev, part) \
-	(((conf) & 0xff) << 24 | ((rev) & 0xf) << 20 | \
-	AMBA_VENDOR_LINUX << 12 | ((part) & 0xfff))
-
 extern struct bus_type amba_bustype;
 
 #define to_amba_device(d)	container_of(d, struct amba_device, dev)
-- 
cgit v1.2.3


From 4167bd25ec3bc221387ec6811c05eadfe3cf1d3e Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 18 Nov 2021 08:31:24 +0100
Subject: mxser: move ids from pci_ids.h here

There is no point having MOXA PCI device IDs in include/linux/pci_ids.h.
Move them to the driver and sort them all by the ID.

Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-pci@vger.kernel.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211118073125.12283-19-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pci_ids.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 011f2f1ea5bb..c389a9c0f290 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1964,24 +1964,6 @@
 #define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003
 
 #define PCI_VENDOR_ID_MOXA		0x1393
-#define PCI_DEVICE_ID_MOXA_RC7000	0x0001
-#define PCI_DEVICE_ID_MOXA_CP102	0x1020
-#define PCI_DEVICE_ID_MOXA_CP102UL	0x1021
-#define PCI_DEVICE_ID_MOXA_CP102U	0x1022
-#define PCI_DEVICE_ID_MOXA_C104		0x1040
-#define PCI_DEVICE_ID_MOXA_CP104U	0x1041
-#define PCI_DEVICE_ID_MOXA_CP104JU	0x1042
-#define PCI_DEVICE_ID_MOXA_CP104EL	0x1043
-#define PCI_DEVICE_ID_MOXA_CT114	0x1140
-#define PCI_DEVICE_ID_MOXA_CP114	0x1141
-#define PCI_DEVICE_ID_MOXA_CP118U	0x1180
-#define PCI_DEVICE_ID_MOXA_CP118EL	0x1181
-#define PCI_DEVICE_ID_MOXA_CP132	0x1320
-#define PCI_DEVICE_ID_MOXA_CP132U	0x1321
-#define PCI_DEVICE_ID_MOXA_CP134U	0x1340
-#define PCI_DEVICE_ID_MOXA_C168		0x1680
-#define PCI_DEVICE_ID_MOXA_CP168U	0x1681
-#define PCI_DEVICE_ID_MOXA_CP168EL	0x1682
 #define PCI_DEVICE_ID_MOXA_CP204J	0x2040
 #define PCI_DEVICE_ID_MOXA_C218		0x2180
 #define PCI_DEVICE_ID_MOXA_C320		0x3200
-- 
cgit v1.2.3


From 5db96ef23bda6c2a61a51693c85b78b52d03f654 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 22 Nov 2021 12:16:48 +0100
Subject: tty: drop tty_schedule_flip()

Since commit a9c3f68f3cd8d (tty: Fix low_latency BUG) in 2014,
tty_flip_buffer_push() is only a wrapper to tty_schedule_flip(). All
users were converted in the previous patches, so remove
tty_schedule_flip() completely while inlining its body into
tty_flip_buffer_push().

One less exported function.

Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211122111648.30379-4-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_flip.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 9916acb5de49..483d41cbcbb7 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -17,7 +17,6 @@ int tty_insert_flip_string_fixed_flag(struct tty_port *port,
 int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars,
 		size_t size);
 void tty_flip_buffer_push(struct tty_port *port);
-void tty_schedule_flip(struct tty_port *port);
 int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag);
 
 static inline int tty_insert_flip_char(struct tty_port *port,
-- 
cgit v1.2.3


From d78328bcc4d0e677f2ff83f4ae1f43c933fbd143 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 22 Nov 2021 10:45:29 +0100
Subject: tty: remove file from tty_ldisc_ops::ioctl and compat_ioctl

After the previous patches, noone needs 'file' parameter in neither
ioctl hook from tty_ldisc_ops. So remove 'file' from both of them.

Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Andreas Koensgen <ajk@comnets.uni-bremen.de>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com> [NFC]
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211122094529.24171-1-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_ldisc.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index b85d84fb5f49..25f07017bbad 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -45,8 +45,7 @@ struct tty_struct;
  *	some processing on the characters first.  If this function is
  *	not defined, the user will receive an EIO error.
  *
- * int	(*ioctl)(struct tty_struct * tty, struct file * file,
- *		 unsigned int cmd, unsigned long arg);
+ * int	(*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg);
  *
  *	This function is called when the user requests an ioctl which
  *	is not handled by the tty layer or the low-level tty driver.
@@ -56,8 +55,8 @@ struct tty_struct;
  *	low-level driver can "grab" an ioctl request before the line
  *	discpline has a chance to see it.
  *
- * int	(*compat_ioctl)(struct tty_struct * tty, struct file * file,
- *		        unsigned int cmd, unsigned long arg);
+ * int	(*compat_ioctl)(struct tty_struct *tty, unsigned int cmd,
+ *			unsigned long arg);
  *
  *	Process ioctl calls from 32-bit process on 64-bit system
  *
@@ -192,10 +191,10 @@ struct tty_ldisc_ops {
 			void **cookie, unsigned long offset);
 	ssize_t	(*write)(struct tty_struct *tty, struct file *file,
 			 const unsigned char *buf, size_t nr);
-	int	(*ioctl)(struct tty_struct *tty, struct file *file,
-			 unsigned int cmd, unsigned long arg);
-	int	(*compat_ioctl)(struct tty_struct *tty, struct file *file,
-				unsigned int cmd, unsigned long arg);
+	int	(*ioctl)(struct tty_struct *tty, unsigned int cmd,
+			unsigned long arg);
+	int	(*compat_ioctl)(struct tty_struct *tty, unsigned int cmd,
+			unsigned long arg);
 	void	(*set_termios)(struct tty_struct *tty, struct ktermios *old);
 	__poll_t (*poll)(struct tty_struct *, struct file *,
 			     struct poll_table_struct *);
-- 
cgit v1.2.3


From e88422bccda86808359f0d4179e25e5c2191ab4f Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 24 Nov 2021 16:16:12 -0800
Subject: Bluetooth: HCI: Fix definition of hci_rp_read_stored_link_key

Both max_num_keys and num_key are 2 octects:

BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
page 1985:

  Max_Num_Keys:
  Size: 2 octets
  Range: 0x0000 to 0xFFFF

  Num_Keys_Read:
  Size: 2 octets
  Range: 0x0000 to 0xFFFF

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 4 ++--
 include/net/bluetooth/hci_core.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 84db6b275231..923534da9c0f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1047,8 +1047,8 @@ struct hci_cp_read_stored_link_key {
 } __packed;
 struct hci_rp_read_stored_link_key {
 	__u8     status;
-	__u8     max_keys;
-	__u8     num_keys;
+	__le16   max_keys;
+	__le16   num_keys;
 } __packed;
 
 #define HCI_OP_DELETE_STORED_LINK_KEY	0x0c12
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2560cfe80db8..bb07a6d0d597 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -352,8 +352,8 @@ struct hci_dev {
 	__u16		lmp_subver;
 	__u16		voice_setting;
 	__u8		num_iac;
-	__u8		stored_max_keys;
-	__u8		stored_num_keys;
+	__u16		stored_max_keys;
+	__u16		stored_num_keys;
 	__u8		io_capability;
 	__s8		inq_tx_power;
 	__u8		err_data_reporting;
-- 
cgit v1.2.3


From 7978656caf2ad3963fb63797128ec8d71caa2798 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 24 Nov 2021 16:16:13 -0800
Subject: Bluetooth: HCI: Fix definition of hci_rp_delete_stored_link_key

num_keys is actually 2 octects not 1:

BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
page 1989:

  Num_Keys_Deleted:
  Size: 2 octets
  0xXXXX	Number of Link Keys Deleted

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 923534da9c0f..0d2a9216869b 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1058,7 +1058,7 @@ struct hci_cp_delete_stored_link_key {
 } __packed;
 struct hci_rp_delete_stored_link_key {
 	__u8     status;
-	__u8     num_keys;
+	__le16   num_keys;
 } __packed;
 
 #define HCI_MAX_NAME_LENGTH		248
-- 
cgit v1.2.3


From ea13aed5e5dfbabd166759aaf9f4af3cb804875a Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Thu, 25 Nov 2021 15:04:36 +0800
Subject: Bluetooth: Send device found event on name resolve failure

Introducing NAME_REQUEST_FAILED flag that will be sent together with
device found event on name resolve failure. This will provide the
userspace with an information so it can decide not to resolve the
name for these devices in the future.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 23a0524061b7..107b25deae68 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -936,10 +936,11 @@ struct mgmt_ev_auth_failed {
 	__u8	status;
 } __packed;
 
-#define MGMT_DEV_FOUND_CONFIRM_NAME    0x01
-#define MGMT_DEV_FOUND_LEGACY_PAIRING  0x02
-#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04
-#define MGMT_DEV_FOUND_INITIATED_CONN  0x08
+#define MGMT_DEV_FOUND_CONFIRM_NAME		0x01
+#define MGMT_DEV_FOUND_LEGACY_PAIRING		0x02
+#define MGMT_DEV_FOUND_NOT_CONNECTABLE		0x04
+#define MGMT_DEV_FOUND_INITIATED_CONN		0x08
+#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED	0x10
 
 #define MGMT_EV_DEVICE_FOUND		0x0012
 struct mgmt_ev_device_found {
-- 
cgit v1.2.3


From dbf6811abbfcc79d3cd5ce1ff53fe1c741167a1f Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Thu, 25 Nov 2021 15:04:37 +0800
Subject: Bluetooth: Limit duration of Remote Name Resolve

When doing remote name request, we cannot scan. In the normal case it's
OK since we can expect it to finish within a short amount of time.
However, there is a possibility to scan lots of devices that
(1) requires Remote Name Resolve
(2) is unresponsive to Remote Name Resolve
When this happens, we are stuck to do Remote Name Resolve until all is
done before continue scanning.

This patch adds a time limit to stop us spending too long on remote
name request.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index bb07a6d0d597..7bae8376fd6f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -88,6 +88,7 @@ struct discovery_state {
 	u8			(*uuids)[16];
 	unsigned long		scan_start;
 	unsigned long		scan_duration;
+	unsigned long		name_resolve_timeout;
 };
 
 #define SUSPEND_NOTIFIER_TIMEOUT	msecs_to_jiffies(2000) /* 2 seconds */
@@ -1759,6 +1760,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
 #define DISCOV_LE_FAST_ADV_INT_MIN	0x00A0	/* 100 msec */
 #define DISCOV_LE_FAST_ADV_INT_MAX	0x00F0	/* 150 msec */
 
+#define NAME_RESOLVE_DURATION		msecs_to_jiffies(10240)	/* 10.24 sec */
+
 void mgmt_fill_version_info(void *ver);
 int mgmt_new_settings(struct hci_dev *hdev);
 void mgmt_index_added(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 8abe19703825eda9c49f54624af57546c23af53f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 25 Nov 2021 14:58:08 +0200
Subject: net: dsa: felix: enable cut-through forwarding between ports by
 default

The VSC9959 switch embedded within NXP LS1028A (and that version of
Ocelot switches only) supports cut-through forwarding - meaning it can
start the process of looking up the destination ports for a packet, and
forward towards those ports, before the entire packet has been received
(as opposed to the store-and-forward mode).

The up side is having lower forwarding latency for large packets. The
down side is that frames with FCS errors are forwarded instead of being
dropped. However, erroneous frames do not result in incorrect updates of
the FDB or incorrect policer updates, since these processes are deferred
inside the switch to the end of frame. Since the switch starts the
cut-through forwarding process after all packet headers (including IP,
if any) have been processed, packets with large headers and small
payload do not see the benefit of lower forwarding latency.

There are two cases that need special attention.

The first is when a packet is multicast (or flooded) to multiple
destinations, one of which doesn't have cut-through forwarding enabled.
The switch deals with this automatically by disabling cut-through
forwarding for the frame towards all destination ports.

The second is when a packet is forwarded from a port of lower link speed
towards a port of higher link speed. This is not handled by the hardware
and needs software intervention.

Since we practically need to update the cut-through forwarding domain
from paths that aren't serialized by the rtnl_mutex (phylink
mac_link_down/mac_link_up ops), this means we need to serialize physical
link events with user space updates of bonding/bridging domains.

Enabling cut-through forwarding is done per {egress port, traffic class}.
I don't see any reason why this would be a configurable option as long
as it works without issues, and there doesn't appear to be any user
space configuration tool to toggle this on/off, so this patch enables
cut-through forwarding on all eligible ports and traffic classes.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20211125125808.2383984-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/ocelot.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 89d17629efe5..33f2e8c9e88b 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -561,6 +561,7 @@ struct ocelot_ops {
 	int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f);
 	int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f,
 			      struct flow_stats *stats);
+	void (*cut_through_fwd)(struct ocelot *ocelot);
 };
 
 struct ocelot_vcap_policer {
@@ -655,6 +656,8 @@ struct ocelot_port {
 
 	struct net_device		*bridge;
 	u8				stp_state;
+
+	int				speed;
 };
 
 struct ocelot {
@@ -712,6 +715,8 @@ struct ocelot {
 
 	/* Lock for serializing access to the MAC table */
 	struct mutex			mact_lock;
+	/* Lock for serializing forwarding domain changes */
+	struct mutex			fwd_domain_lock;
 
 	struct workqueue_struct		*owq;
 
@@ -811,7 +816,9 @@ void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
 int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
 			       struct netlink_ext_ack *extack);
 void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
-void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot);
+u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot);
+u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port);
+void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining);
 int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
 				 struct switchdev_brport_flags val);
 void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,
-- 
cgit v1.2.3


From 703319094c9c2bf34f65d3496ccb350149fdd14b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 24 Nov 2021 14:26:14 -0500
Subject: sctp: make the raise timer more simple and accurate

Currently, the probe timer is reused as the raise timer when PLPMTUD is in
the Search Complete state. raise_count was introduced to count how many
times the probe timer has timed out. When raise_count reaches to 30, the
raise timer handler will be triggered.

During the whole processing above, the timer keeps timing out every probe_
interval. It is a waste for the Search Complete state, as the raise timer
only needs to time out after 30 * probe_interval.

Since the raise timer and probe timer are never used at the same time, it
is no need to keep probe timer 'alive' in the Search Complete state. This
patch to introduce sctp_transport_reset_raise_timer() to start the timer
as the raise timer when entering the Search Complete state. When entering
the other states, sctp_transport_reset_probe_timer() will still be called
to reset the timer to the probe timer.

raise_count can be removed from sctp_transport as no need to count probe
timer timeout for raise timer timeout. last_rtx_chunks can be removed as
sctp_transport_reset_probe_timer() can be called in the place where asoc
rtx_data_chunks is changed.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Link: https://lore.kernel.org/r/edb0e48988ea85997488478b705b11ddc1ba724a.1637781974.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sctp/structs.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 899c29c326ba..e7d1ed7a3dfb 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -984,12 +984,10 @@ struct sctp_transport {
 	} cacc;
 
 	struct {
-		__u32 last_rtx_chunks;
 		__u16 pmtu;
 		__u16 probe_size;
 		__u16 probe_high;
-		__u8 probe_count:3;
-		__u8 raise_count:5;
+		__u8 probe_count;
 		__u8 state;
 	} pl; /* plpmtud related */
 
@@ -1011,6 +1009,7 @@ void sctp_transport_reset_t3_rtx(struct sctp_transport *);
 void sctp_transport_reset_hb_timer(struct sctp_transport *);
 void sctp_transport_reset_reconf_timer(struct sctp_transport *transport);
 void sctp_transport_reset_probe_timer(struct sctp_transport *transport);
+void sctp_transport_reset_raise_timer(struct sctp_transport *transport);
 int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1025,7 +1024,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
-bool sctp_transport_pl_send(struct sctp_transport *t);
+void sctp_transport_pl_send(struct sctp_transport *t);
 bool sctp_transport_pl_recv(struct sctp_transport *t);
 
 
-- 
cgit v1.2.3


From 0bd28476f6363c7ccc841fe6a0ab0dd1fdb822f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Nov 2021 12:24:45 -0800
Subject: gro: optimize skb_gro_postpull_rcsum()

We can leverage third argument to csum_partial():

  X = csum_sub(X, csum_partial(start, len, 0));

  -->

  X = csum_add(X, ~csum_partial(start, len, 0));

  -->

  X = ~csum_partial(start, len, ~X);

This removes one add/adc pair and its dependency against the carry flag.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/gro.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/gro.h b/include/net/gro.h
index 9c22a010369c..b1139fca7c43 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -173,8 +173,8 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
 					const void *start, unsigned int len)
 {
 	if (NAPI_GRO_CB(skb)->csum_valid)
-		NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
-						  csum_partial(start, len, 0));
+		NAPI_GRO_CB(skb)->csum = ~csum_partial(start, len,
+						       ~NAPI_GRO_CB(skb)->csum);
 }
 
 /* GRO checksum functions. These are logical equivalents of the normal
-- 
cgit v1.2.3


From 29c3002644bdd653f6ec6407d25135d0a4f7cefb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Nov 2021 12:24:46 -0800
Subject: net: optimize skb_postpull_rcsum()

Remove one pair of add/adc instructions and their dependency
against carry flag.

We can leverage third argument to csum_partial():

  X = csum_block_sub(X, csum_partial(start, len, 0), 0);

  -->

  X = csum_block_add(X, ~csum_partial(start, len, 0), 0);

  -->

  X = ~csum_partial(start, len, ~X);

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index eba256af64a5..eae4bd3237a4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3485,7 +3485,11 @@ __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	__skb_postpull_rcsum(skb, start, len, 0);
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = ~csum_partial(start, len, ~skb->csum);
+	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		 skb_checksum_start_offset(skb) < 0)
+		skb->ip_summed = CHECKSUM_NONE;
 }
 
 static __always_inline void
-- 
cgit v1.2.3


From ce8ce31b2c5c8b18667784b8c515650c65d57b4e Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 15 Nov 2021 15:18:04 +0100
Subject: crypto: drbg - prepare for more fine-grained tracking of seeding
 state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are two different randomness sources the DRBGs are getting seeded
from, namely the jitterentropy source (if enabled) and get_random_bytes().
At initial DRBG seeding time during boot, the latter might not have
collected sufficient entropy for seeding itself yet and thus, the DRBG
implementation schedules a reseed work from a random_ready_callback once
that has happened. This is particularly important for the !->pr DRBG
instances, for which (almost) no further reseeds are getting triggered
during their lifetime.

Because collecting data from the jitterentropy source is a rather expensive
operation, the aforementioned asynchronously scheduled reseed work
restricts itself to get_random_bytes() only. That is, it in some sense
amends the initial DRBG seed derived from jitterentropy output at full
(estimated) entropy with fresh randomness obtained from get_random_bytes()
once that has been seeded with sufficient entropy itself.

With the advent of rng_is_initialized(), there is no real need for doing
the reseed operation from an asynchronously scheduled work anymore and a
subsequent patch will make it synchronous by moving it next to related
logic already present in drbg_generate().

However, for tracking whether a full reseed including the jitterentropy
source is required or a "partial" reseed involving only get_random_bytes()
would be sufficient already, the boolean struct drbg_state's ->seeded
member must become a tristate value.

Prepare for this by introducing the new enum drbg_seed_state and change
struct drbg_state's ->seeded member's type from bool to that type.

For facilitating review, enum drbg_seed_state is made to only contain
two members corresponding to the former ->seeded values of false and true
resp. at this point: DRBG_SEED_STATE_UNSEEDED and DRBG_SEED_STATE_FULL. A
third one for tracking the intermediate state of "seeded from jitterentropy
only" will be introduced with a subsequent patch.

There is no change in behaviour at this point.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Stephan Müller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/drbg.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index c4165126937e..92a87b23ad2f 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -105,6 +105,11 @@ struct drbg_test_data {
 	struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */
 };
 
+enum drbg_seed_state {
+	DRBG_SEED_STATE_UNSEEDED,
+	DRBG_SEED_STATE_FULL,
+};
+
 struct drbg_state {
 	struct mutex drbg_mutex;	/* lock around DRBG */
 	unsigned char *V;	/* internal state 10.1.1.1 1a) */
@@ -127,7 +132,7 @@ struct drbg_state {
 	struct crypto_wait ctr_wait;		/* CTR mode async wait obj */
 	struct scatterlist sg_in, sg_out;	/* CTR mode SGLs */
 
-	bool seeded;		/* DRBG fully seeded? */
+	enum drbg_seed_state seeded;		/* DRBG fully seeded? */
 	bool pr;		/* Prediction resistance enabled? */
 	bool fips_primed;	/* Continuous test primed? */
 	unsigned char *prev;	/* FIPS 140-2 continuous test value */
-- 
cgit v1.2.3


From 2bcd25443868aa8863779a6ebc6c9319633025d2 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 15 Nov 2021 15:18:05 +0100
Subject: crypto: drbg - track whether DRBG was seeded with
 !rng_is_initialized()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the DRBG implementation schedules asynchronous works from
random_ready_callbacks for reseeding the DRBG instances with output from
get_random_bytes() once the latter has sufficient entropy available.

However, as the get_random_bytes() initialization state can get queried by
means of rng_is_initialized() now, there is no real need for this
asynchronous reseeding logic anymore and it's better to keep things simple
by doing it synchronously when needed instead, i.e. from drbg_generate()
once rng_is_initialized() has flipped to true.

Of course, for this to work, drbg_generate() would need some means by which
it can tell whether or not rng_is_initialized() has flipped to true since
the last seeding from get_random_bytes(). Or equivalently, whether or not
the last seed from get_random_bytes() has happened when
rng_is_initialized() was still evaluating to false.

As it currently stands, enum drbg_seed_state allows for the representation
of two different DRBG seeding states: DRBG_SEED_STATE_UNSEEDED and
DRBG_SEED_STATE_FULL. The former makes drbg_generate() to invoke a full
reseeding operation involving both, the rather expensive jitterentropy as
well as the get_random_bytes() randomness sources. The DRBG_SEED_STATE_FULL
state on the other hand implies that no reseeding at all is required for a
!->pr DRBG variant.

Introduce the new DRBG_SEED_STATE_PARTIAL state to enum drbg_seed_state for
representing the condition that a DRBG was being seeded when
rng_is_initialized() had still been false. In particular, this new state
implies that
- the given DRBG instance has been fully seeded from the jitterentropy
  source (if enabled)
- and drbg_generate() is supposed to reseed from get_random_bytes()
  *only* once rng_is_initialized() turns to true.

Up to now, the __drbg_seed() helper used to set the given DRBG instance's
->seeded state to constant DRBG_SEED_STATE_FULL. Introduce a new argument
allowing for the specification of the to be written ->seeded value instead.
Make the first of its two callers, drbg_seed(), determine the appropriate
value based on rng_is_initialized(). The remaining caller,
drbg_async_seed(), is known to get invoked only once rng_is_initialized()
is true, hence let it pass constant DRBG_SEED_STATE_FULL for the new
argument to __drbg_seed().

There is no change in behaviour, except for that the pr_devel() in
drbg_generate() would now report "unseeded" for ->pr DRBG instances which
had last been seeded when rng_is_initialized() was still evaluating to
false.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Stephan Müller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/drbg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 92a87b23ad2f..3ebdb1effe74 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -107,6 +107,7 @@ struct drbg_test_data {
 
 enum drbg_seed_state {
 	DRBG_SEED_STATE_UNSEEDED,
+	DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */
 	DRBG_SEED_STATE_FULL,
 };
 
-- 
cgit v1.2.3


From 074bcd4000e0d812bc253f86fedc40f81ed59ccc Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 15 Nov 2021 15:18:07 +0100
Subject: crypto: drbg - make reseeding from get_random_bytes() synchronous

get_random_bytes() usually hasn't full entropy available by the time DRBG
instances are first getting seeded from it during boot. Thus, the DRBG
implementation registers random_ready_callbacks which would in turn
schedule some work for reseeding the DRBGs once get_random_bytes() has
sufficient entropy available.

For reference, the relevant history around handling DRBG (re)seeding in
the context of a not yet fully seeded get_random_bytes() is:

  commit 16b369a91d0d ("random: Blocking API for accessing
                        nonblocking_pool")
  commit 4c7879907edd ("crypto: drbg - add async seeding operation")

  commit 205a525c3342 ("random: Add callback API for random pool
                        readiness")
  commit 57225e679788 ("crypto: drbg - Use callback API for random
                        readiness")
  commit c2719503f5e1 ("random: Remove kernel blocking API")

However, some time later, the initialization state of get_random_bytes()
has been made queryable via rng_is_initialized() introduced with commit
9a47249d444d ("random: Make crng state queryable"). This primitive now
allows for streamlining the DRBG reseeding from get_random_bytes() by
replacing that aforementioned asynchronous work scheduling from
random_ready_callbacks with some simpler, synchronous code in
drbg_generate() next to the related logic already present therein. Apart
from improving overall code readability, this change will also enable DRBG
users to rely on wait_for_random_bytes() for ensuring that the initial
seeding has completed, if desired.

The previous patches already laid the grounds by making drbg_seed() to
record at each DRBG instance whether it was being seeded at a time when
rng_is_initialized() still had been false as indicated by
->seeded == DRBG_SEED_STATE_PARTIAL.

All that remains to be done now is to make drbg_generate() check for this
condition, determine whether rng_is_initialized() has flipped to true in
the meanwhile and invoke a reseed from get_random_bytes() if so.

Make this move:
- rename the former drbg_async_seed() work handler, i.e. the one in charge
  of reseeding a DRBG instance from get_random_bytes(), to
  "drbg_seed_from_random()",
- change its signature as appropriate, i.e. make it take a struct
  drbg_state rather than a work_struct and change its return type from
  "void" to "int" in order to allow for passing error information from
  e.g. its __drbg_seed() invocation onwards to callers,
- make drbg_generate() invoke this drbg_seed_from_random() once it
  encounters a DRBG instance with ->seeded == DRBG_SEED_STATE_PARTIAL by
  the time rng_is_initialized() has flipped to true and
- prune everything related to the former, random_ready_callback based
  mechanism.

As drbg_seed_from_random() is now getting invoked from drbg_generate() with
the ->drbg_mutex being held, it must not attempt to recursively grab it
once again. Remove the corresponding mutex operations from what is now
drbg_seed_from_random(). Furthermore, as drbg_seed_from_random() can now
report errors directly to its caller, there's no need for it to temporarily
switch the DRBG's ->seeded state to DRBG_SEED_STATE_UNSEEDED so that a
failure of the subsequently invoked __drbg_seed() will get signaled to
drbg_generate(). Don't do it then.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/drbg.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 3ebdb1effe74..a6c3b8e7deb6 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -137,12 +137,10 @@ struct drbg_state {
 	bool pr;		/* Prediction resistance enabled? */
 	bool fips_primed;	/* Continuous test primed? */
 	unsigned char *prev;	/* FIPS 140-2 continuous test value */
-	struct work_struct seed_work;	/* asynchronous seeding support */
 	struct crypto_rng *jent;
 	const struct drbg_state_ops *d_ops;
 	const struct drbg_core *core;
 	struct drbg_string test_data;
-	struct random_ready_callback random_ready;
 };
 
 static inline __u8 drbg_statelen(struct drbg_state *drbg)
-- 
cgit v1.2.3


From 8ea5ee00beb925d2aa0fed0eb3faf04715a3f2bd Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 15 Nov 2021 15:18:09 +0100
Subject: crypto: drbg - reseed 'nopr' drbgs periodically from
 get_random_bytes()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In contrast to the fully prediction resistant 'pr' DRBGs, the 'nopr'
variants get seeded once at boot and reseeded only rarely thereafter,
namely only after 2^20 requests have been served each. AFAICT, this
reseeding based on the number of requests served is primarily motivated
by information theoretic considerations, c.f. NIST SP800-90Ar1,
sec. 8.6.8 ("Reseeding").

However, given the relatively large seed lifetime of 2^20 requests, the
'nopr' DRBGs can hardly be considered to provide any prediction resistance
whatsoever, i.e. to protect against threats like side channel leaks of the
internal DRBG state (think e.g. leaked VM snapshots). This is expected and
completely in line with the 'nopr' naming, but as e.g. the
"drbg_nopr_hmac_sha512" implementation is potentially being used for
providing the "stdrng" and thus, the crypto_default_rng serving the
in-kernel crypto, it would certainly be desirable to achieve at least the
same level of prediction resistance as get_random_bytes() does.

Note that the chacha20 rngs underlying get_random_bytes() get reseeded
every CRNG_RESEED_INTERVAL == 5min: the secondary, per-NUMA node rngs from
the primary one and the primary rng in turn from the entropy pool, provided
sufficient entropy is available.

The 'nopr' DRBGs do draw randomness from get_random_bytes() for their
initial seed already, so making them to reseed themselves periodically from
get_random_bytes() in order to let them benefit from the latter's
prediction resistance is not such a big change conceptually.

In principle, it would have been also possible to make the 'nopr' DRBGs to
periodically invoke a full reseeding operation, i.e. to also consider the
jitterentropy source (if enabled) in addition to get_random_bytes() for the
seed value. However, get_random_bytes() is relatively lightweight as
compared to the jitterentropy generation process and thus, even though the
'nopr' reseeding is supposed to get invoked infrequently, it's IMO still
worthwhile to avoid occasional latency spikes for drbg_generate() and
stick to get_random_bytes() only. As an additional remark, note that
drawing randomness from the non-SP800-90B-conforming get_random_bytes()
only won't adversely affect SP800-90A conformance either: the very same is
being done during boot via drbg_seed_from_random() already once
rng_is_initialized() flips to true and it follows that if the DRBG
implementation does conform to SP800-90A now, it will continue to do so.

Make the 'nopr' DRBGs to reseed themselves periodically from
get_random_bytes() every CRNG_RESEED_INTERVAL == 5min.

More specifically, introduce a new member ->last_seed_time to struct
drbg_state for recording in units of jiffies when the last seeding
operation had taken place. Make __drbg_seed() maintain it and let
drbg_generate() invoke a reseed from get_random_bytes() via
drbg_seed_from_random() if more than 5min have passed by since the last
seeding operation. Be careful to not to reseed if in testing mode though,
or otherwise the drbg related tests in crypto/testmgr.c would fail to
reproduce the expected output.

In order to keep the formatting clean in drbg_generate() wrap the logic
for deciding whether or not a reseed is due in a new helper,
drbg_nopr_reseed_interval_elapsed().

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Stephan Müller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/drbg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index a6c3b8e7deb6..af5ad51d3eef 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -134,6 +134,7 @@ struct drbg_state {
 	struct scatterlist sg_in, sg_out;	/* CTR mode SGLs */
 
 	enum drbg_seed_state seeded;		/* DRBG fully seeded? */
+	unsigned long last_seed_time;
 	bool pr;		/* Prediction resistance enabled? */
 	bool fips_primed;	/* Continuous test primed? */
 	unsigned char *prev;	/* FIPS 140-2 continuous test value */
-- 
cgit v1.2.3


From b808f32023dd8127b0fa27f60fa69a959fd70388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Fri, 19 Nov 2021 07:55:33 +0100
Subject: crypto: kdf - Add key derivation self-test support code

As a preparation to add the key derivation implementations, the
self-test data structure definition and the common test code is made
available.

The test framework follows the testing applied by the NIST CAVP test
approach.

The structure of the test code follows the implementations found in
crypto/testmgr.c|h. In case the KDF implementations will be made
available via a kernel crypto API templates, the test code is intended
to be merged into testmgr.c|h.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/kdf_selftest.h | 71 ++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 include/crypto/internal/kdf_selftest.h

(limited to 'include')

diff --git a/include/crypto/internal/kdf_selftest.h b/include/crypto/internal/kdf_selftest.h
new file mode 100644
index 000000000000..4d03d2af57b7
--- /dev/null
+++ b/include/crypto/internal/kdf_selftest.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (C) 2021, Stephan Mueller <smueller@chronox.de>
+ */
+
+#ifndef _CRYPTO_KDF_SELFTEST_H
+#define _CRYPTO_KDF_SELFTEST_H
+
+#include <crypto/hash.h>
+#include <linux/uio.h>
+
+struct kdf_testvec {
+	unsigned char *key;
+	size_t keylen;
+	unsigned char *ikm;
+	size_t ikmlen;
+	struct kvec info;
+	unsigned char *expected;
+	size_t expectedlen;
+};
+
+static inline int
+kdf_test(const struct kdf_testvec *test, const char *name,
+	 int (*crypto_kdf_setkey)(struct crypto_shash *kmd,
+				  const u8 *key, size_t keylen,
+				  const u8 *ikm, size_t ikmlen),
+	 int (*crypto_kdf_generate)(struct crypto_shash *kmd,
+				    const struct kvec *info,
+				    unsigned int info_nvec,
+				    u8 *dst, unsigned int dlen))
+{
+	struct crypto_shash *kmd;
+	int ret;
+	u8 *buf = kzalloc(test->expectedlen, GFP_KERNEL);
+
+	if (!buf)
+		return -ENOMEM;
+
+	kmd = crypto_alloc_shash(name, 0, 0);
+	if (IS_ERR(kmd)) {
+		pr_err("alg: kdf: could not allocate hash handle for %s\n",
+		       name);
+		kfree(buf);
+		return -ENOMEM;
+	}
+
+	ret = crypto_kdf_setkey(kmd, test->key, test->keylen,
+				test->ikm, test->ikmlen);
+	if (ret) {
+		pr_err("alg: kdf: could not set key derivation key\n");
+		goto err;
+	}
+
+	ret = crypto_kdf_generate(kmd, &test->info, 1, buf, test->expectedlen);
+	if (ret) {
+		pr_err("alg: kdf: could not obtain key data\n");
+		goto err;
+	}
+
+	ret = memcmp(test->expected, buf, test->expectedlen);
+	if (ret)
+		ret = -EINVAL;
+
+err:
+	crypto_free_shash(kmd);
+	kfree(buf);
+	return ret;
+}
+
+#endif /* _CRYPTO_KDF_SELFTEST_H */
-- 
cgit v1.2.3


From 026a733e66592e743a0905c7fd6b5d3bf89b2d7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Fri, 19 Nov 2021 07:55:58 +0100
Subject: crypto: kdf - add SP800-108 counter key derivation function

SP800-108 defines three KDFs - this patch provides the counter KDF
implementation.

The KDF is implemented as a service function where the caller has to
maintain the hash / HMAC state. Apart from this hash/HMAC state, no
additional state is required to be maintained by either the caller or
the KDF implementation.

The key for the KDF is set with the crypto_kdf108_setkey function which
is intended to be invoked before the caller requests a key derivation
operation via crypto_kdf108_ctr_generate.

SP800-108 allows the use of either a HMAC or a hash as crypto primitive
for the KDF. When a HMAC primtive is intended to be used,
crypto_kdf108_setkey must be used to set the HMAC key. Otherwise, for a
hash crypto primitve crypto_kdf108_ctr_generate can be used immediately
after allocating the hash handle.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/kdf_sp800108.h | 61 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 include/crypto/kdf_sp800108.h

(limited to 'include')

diff --git a/include/crypto/kdf_sp800108.h b/include/crypto/kdf_sp800108.h
new file mode 100644
index 000000000000..b7b20a778fb7
--- /dev/null
+++ b/include/crypto/kdf_sp800108.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (C) 2021, Stephan Mueller <smueller@chronox.de>
+ */
+
+#ifndef _CRYPTO_KDF108_H
+#define _CRYPTO_KDF108_H
+
+#include <crypto/hash.h>
+#include <linux/uio.h>
+
+/**
+ * Counter KDF generate operation according to SP800-108 section 5.1
+ * as well as SP800-56A section 5.8.1 (Single-step KDF).
+ *
+ * @kmd Keyed message digest whose key was set with crypto_kdf108_setkey or
+ *	unkeyed message digest
+ * @info optional context and application specific information - this may be
+ *	 NULL
+ * @info_vec number of optional context/application specific information entries
+ * @dst destination buffer that the caller already allocated
+ * @dlen length of the destination buffer - the KDF derives that amount of
+ *	 bytes.
+ *
+ * To comply with SP800-108, the caller must provide Label || 0x00 || Context
+ * in the info parameter.
+ *
+ * @return 0 on success, < 0 on error
+ */
+int crypto_kdf108_ctr_generate(struct crypto_shash *kmd,
+			       const struct kvec *info, unsigned int info_nvec,
+			       u8 *dst, unsigned int dlen);
+
+/**
+ * Counter KDF setkey operation
+ *
+ * @kmd Keyed message digest allocated by the caller. The key should not have
+ *	been set.
+ * @key Seed key to be used to initialize the keyed message digest context.
+ * @keylen This length of the key buffer.
+ * @ikm The SP800-108 KDF does not support IKM - this parameter must be NULL
+ * @ikmlen This parameter must be 0.
+ *
+ * According to SP800-108 section 7.2, the seed key must be at least as large as
+ * the message digest size of the used keyed message digest. This limitation
+ * is enforced by the implementation.
+ *
+ * SP800-108 allows the use of either a HMAC or a hash primitive. When
+ * the caller intends to use a hash primitive, the call to
+ * crypto_kdf108_setkey is not required and the key derivation operation can
+ * immediately performed using crypto_kdf108_ctr_generate after allocating
+ * a handle.
+ *
+ * @return 0 on success, < 0 on error
+ */
+int crypto_kdf108_setkey(struct crypto_shash *kmd,
+			 const u8 *key, size_t keylen,
+			 const u8 *ikm, size_t ikmlen);
+
+#endif /* _CRYPTO_KDF108_H */
-- 
cgit v1.2.3


From d787a3e38f01bfc4566df4e85d432a29d192e637 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 12 Nov 2021 12:22:23 +0100
Subject: mac80211: add support for .ndo_fill_forward_path

This allows drivers to provide a destination device + info for flow offload
Only supported in combination with 802.3 encap offload

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Tested-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/20211112112223.1209-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 775dbb982654..10e6fe215f0f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3952,6 +3952,8 @@ struct ieee80211_prep_tx_info {
  *	radar channel.
  *	The caller is expected to set chandef pointer to NULL in order to
  *	disable offchannel CAC/radar detection.
+ * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to
+ *	resolve a path for hardware flow offloading
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -4282,6 +4284,11 @@ struct ieee80211_ops {
 				     struct ieee80211_sta *sta, u8 flowid);
 	int (*set_radar_offchan)(struct ieee80211_hw *hw,
 				 struct cfg80211_chan_def *chandef);
+	int (*net_fill_forward_path)(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif,
+				     struct ieee80211_sta *sta,
+				     struct net_device_path_ctx *ctx,
+				     struct net_device_path *path);
 };
 
 /**
-- 
cgit v1.2.3


From c47240cb46a10c40686ce4e25c64aaed676f71c9 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 16 Nov 2021 12:41:52 +0100
Subject: cfg80211: schedule offchan_cac_abort_wk in cfg80211_radar_event

If necessary schedule offchan_cac_abort_wk work in cfg80211_radar_event
routine adding offchan parameter to cfg80211_radar_event signature.
Rename cfg80211_radar_event in __cfg80211_radar_event and introduce
the two following inline helpers:
- cfg80211_radar_event
- cfg80211_offchan_radar_event
Doing so the drv will not need to run cfg80211_offchan_cac_abort() after
radar detection on the offchannel chain.

Tested-by: Owen Peng <owen.peng@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/3ff583e021e3343a3ced54a7b09b5e184d1880dc.1637062727.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 362da9f6bf39..a887086cb103 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7605,15 +7605,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer,
 void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp);
 
 /**
- * cfg80211_radar_event - radar detection event
+ * __cfg80211_radar_event - radar detection event
  * @wiphy: the wiphy
  * @chandef: chandef for the current channel
+ * @offchan: the radar has been detected on the offchannel chain
  * @gfp: context flags
  *
  * This function is called when a radar is detected on the current chanenl.
  */
-void cfg80211_radar_event(struct wiphy *wiphy,
-			  struct cfg80211_chan_def *chandef, gfp_t gfp);
+void __cfg80211_radar_event(struct wiphy *wiphy,
+			    struct cfg80211_chan_def *chandef,
+			    bool offchan, gfp_t gfp);
+
+static inline void
+cfg80211_radar_event(struct wiphy *wiphy,
+		     struct cfg80211_chan_def *chandef,
+		     gfp_t gfp)
+{
+	__cfg80211_radar_event(wiphy, chandef, false, gfp);
+}
+
+static inline void
+cfg80211_offchan_radar_event(struct wiphy *wiphy,
+			     struct cfg80211_chan_def *chandef,
+			     gfp_t gfp)
+{
+	__cfg80211_radar_event(wiphy, chandef, true, gfp);
+}
 
 /**
  * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
-- 
cgit v1.2.3


From fb5f6a0e8063b7a84d6d44ef353846ccd7708d2e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 18 Nov 2021 12:38:39 -0800
Subject: mac80211: Use memset_after() to clear tx status

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memset(), avoid intentionally writing across
neighboring fields.

Use memset_after() so memset() doesn't get confused about writing
beyond the destination member that is intended to be the starting point
of zeroing through the end of the struct.

Additionally fix the common helper, ieee80211_tx_info_clear_status(),
which was not clearing ack_signal, but the open-coded versions
did. Johannes Berg points out this bug was introduced by commit
e3e1a0bcb3f1 ("mac80211: reduce IEEE80211_TX_MAX_RATES") but was harmless.

Also drops the associated unneeded BUILD_BUG_ON()s, and adds a note to
carl9170 about usage.

Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Christian Lamparter <chunkeey@gmail.com> [both CARL9170+P54USB on real HW]
Link: https://lore.kernel.org/r/20211118203839.1289276-1-keescook@chromium.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 10e6fe215f0f..e349f57d19ae 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1205,12 +1205,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
 	/* clear the rate counts */
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++)
 		info->status.rates[i].count = 0;
-
-	BUILD_BUG_ON(
-	    offsetof(struct ieee80211_tx_info, status.ack_signal) != 20);
-	memset(&info->status.ampdu_ack_len, 0,
-	       sizeof(struct ieee80211_tx_info) -
-	       offsetof(struct ieee80211_tx_info, status.ampdu_ack_len));
+	memset_after(&info->status, 0, rates);
 }
 
 
-- 
cgit v1.2.3


From a0f84dfb3f6d9f78f862cbe885036d3e4449fc6f Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Thu, 25 Nov 2021 12:15:19 +0200
Subject: ASoC: SOF: IPC: dai: Expand DAI_CONFIG IPC flags

Some DAI components, such as HDaudio, need to be stopped in two steps
a) stop the DAI component
b) stop the DAI DMA

This patch enables this two-step stop by expanding the DAI_CONFIG
IPC flags and split them into 2 parts.

The 4 LSB bits indicate when the DAI_CONFIG IPC is sent, ex: hw_params,
hw_free or pause. The 4 MSB bits are used as the quirk flags to be used
along with the command flags. The quirk flag called
SOF_DAI_CONFIG_FLAGS_2_STEP_STOP shall be set along with the HW_PARAMS
command flag, i.e. before the pipeline is started so that the stop/pause
trigger op in the FW can take the appropriate action to either
perform/skip the DMA stop. If set, the DMA stop will be executed when
the DAI_CONFIG IPC is sent during hw_free. In the case of pause, DMA
pause will be handled when the DAI_CONFIG IPC is sent with the PAUSE
command flag.

Along with this, modify the signature for the hda_ctrl_dai_widget_setup/
hda_ctrl_dai_widget_free() functions to take additional flags as an
argument and modify all users to pass the appropriate quirk flags. Only
the HDA DAI's need to pass the SOF_DAI_CONFIG_FLAGS_2_STEP_STOP quirk
flag during hw_params to indicate that it supports two-step stop and
pause.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://lore.kernel.org/r/20211125101520.291581-10-kai.vehmanen@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai.h | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 5132bc60f54b..59ee50ac7705 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -52,12 +52,25 @@
 #define SOF_DAI_FMT_INV_MASK		0x0f00
 #define SOF_DAI_FMT_CLOCK_PROVIDER_MASK	0xf000
 
-/* DAI_CONFIG flags */
-#define SOF_DAI_CONFIG_FLAGS_MASK	0x3
-#define SOF_DAI_CONFIG_FLAGS_NONE	(0 << 0) /**< DAI_CONFIG sent without stage information */
-#define SOF_DAI_CONFIG_FLAGS_HW_PARAMS	(1 << 0) /**< DAI_CONFIG sent during hw_params stage */
-#define SOF_DAI_CONFIG_FLAGS_HW_FREE	(2 << 0) /**< DAI_CONFIG sent during hw_free stage */
-#define SOF_DAI_CONFIG_FLAGS_RFU	(3 << 0) /**< not used, reserved for future use */
+/*
+ * DAI_CONFIG flags. The 4 LSB bits are used for the commands, HW_PARAMS, HW_FREE and PAUSE
+ * representing when the IPC is sent. The 4 MSB bits are used to add quirks along with the above
+ * commands.
+ */
+#define SOF_DAI_CONFIG_FLAGS_CMD_MASK	0xF
+#define SOF_DAI_CONFIG_FLAGS_NONE	0 /**< DAI_CONFIG sent without stage information */
+#define SOF_DAI_CONFIG_FLAGS_HW_PARAMS	BIT(0) /**< DAI_CONFIG sent during hw_params stage */
+#define SOF_DAI_CONFIG_FLAGS_HW_FREE	BIT(1) /**< DAI_CONFIG sent during hw_free stage */
+/**< DAI_CONFIG sent during pause trigger. Only available ABI 3.20 onwards */
+#define SOF_DAI_CONFIG_FLAGS_PAUSE	BIT(2)
+#define SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT 4
+#define SOF_DAI_CONFIG_FLAGS_QUIRK_MASK  (0xF << SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT)
+/*
+ * This should be used along with the SOF_DAI_CONFIG_FLAGS_HW_PARAMS to indicate that pipeline
+ * stop/pause and DAI DMA stop/pause should happen in two steps. This change is only available
+ * ABI 3.20 onwards.
+ */
+#define SOF_DAI_CONFIG_FLAGS_2_STEP_STOP BIT(0)
 
 /** \brief Types of DAI */
 enum sof_ipc_dai_type {
-- 
cgit v1.2.3


From b4c80629c5c9d48880c5ad99943374f9ab72432e Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <xypron.glpk@gmx.de>
Date: Sun, 23 May 2021 22:49:57 +0200
Subject: include/linux/byteorder/generic.h: fix index variables

In cpu_to_be32_array() and be32_to_cpu_array() the length of the array is
given by variable len of type size_t. An index variable of type int is used
to iterate over the array. This is bound to fail for len > INT_MAX and
lets GCC add instructions for sign extension.

Correct the type of the index variable.

Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Link: https://lore.kernel.org/r/20210523204958.64575-1-xypron.glpk@gmx.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/byteorder/generic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 4b13e0a3e15b..c9a4c96c9943 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -190,7 +190,7 @@ static inline void be64_add_cpu(__be64 *var, u64 val)
 
 static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len)
 {
-	int i;
+	size_t i;
 
 	for (i = 0; i < len; i++)
 		dst[i] = cpu_to_be32(src[i]);
@@ -198,7 +198,7 @@ static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len)
 
 static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len)
 {
-	int i;
+	size_t i;
 
 	for (i = 0; i < len; i++)
 		dst[i] = be32_to_cpu(src[i]);
-- 
cgit v1.2.3


From 18e6c0751cf9ae0631a2623e31af2bf504f72c30 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:49 +0100
Subject: tty: finish kernel-doc of tty_struct members

There are already pieces of kernel-doc documentation for struct
tty_struct in tty.h. Finish the documentation for the members which were
undocumented yet.

It also includes tuning the already existing pieces like flow and ctrl,
especially adding highlights to them.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-2-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 79 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5dbd7c5afac7..da49ad9be281 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -122,33 +122,84 @@ struct tty_operations;
 /**
  * struct tty_struct - state associated with a tty while open
  *
- * @flow.lock: lock for flow members
- * @flow.stopped: tty stopped/started by tty_stop/tty_start
- * @flow.tco_stopped: tty stopped/started by TCOOFF/TCOON ioctls (it has
- *		      precedense over @flow.stopped)
+ * @magic: magic value set early in @alloc_tty_struct to %TTY_MAGIC, for
+ *	   debugging purposes
+ * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero
+ *	  frees the structure
+ * @dev: class device or %NULL (e.g. ptys, serdev)
+ * @driver: &struct tty_driver operating this tty
+ * @ops: &struct tty_operations of @driver for this tty (open, close, etc.)
+ * @index: index of this tty (e.g. to construct @name like tty12)
+ * @ldisc_sem: protects line discipline changes (@ldisc) -- lock tty not pty
+ * @ldisc: the current line discipline for this tty (n_tty by default)
+ * @atomic_write_lock: protects against concurrent writers, i.e. locks
+ *		       @write_cnt, @write_buf and similar
+ * @legacy_mutex: leftover from history (BKL -> BTM -> @legacy_mutex),
+ *		  protecting several operations on this tty
+ * @throttle_mutex: protects against concurrent tty_throttle_safe() and
+ *		    tty_unthrottle_safe() (but not tty_unthrottle())
+ * @termios_rwsem: protects @termios and @termios_locked
+ * @winsize_mutex: protects @winsize
+ * @termios: termios for the current tty, copied from/to @driver.termios
+ * @termios_locked: locked termios (by %TIOCGLCKTRMIOS and %TIOCSLCKTRMIOS
+ *		    ioctls)
+ * @name: name of the tty constructed by tty_line_name() (e.g. ttyS3)
+ * @flags: bitwise OR of %TTY_THROTTLED, %TTY_IO_ERROR, ...
+ * @count: count of open processes, reaching zero cancels all the work for
+ *	   this tty and drops a @kref too (but does not free this tty)
+ * @winsize: size of the terminal "window" (cf. @winsize_mutex)
+ * @flow: flow settings grouped together, see also @flow.unused
+ * @flow.lock: lock for @flow members
+ * @flow.stopped: tty stopped/started by stop_tty()/start_tty()
+ * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has
+ *		      precedence over @flow.stopped)
  * @flow.unused: alignment for Alpha, so that no members other than @flow.* are
  *		 modified by the same 64b word store. The @flow's __aligned is
  *		 there for the very same reason.
- * @ctrl.lock: lock for ctrl members
+ * @ctrl: control settings grouped together, see also @ctrl.unused
+ * @ctrl.lock: lock for @ctrl members
  * @ctrl.pgrp: process group of this tty (setpgrp(2))
  * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both
- *		  @ctrl.lock and legacy mutex, readers must use at least one of
+ *		  @ctrl.lock and @legacy_mutex, readers must use at least one of
  *		  them.
- * @ctrl.pktstatus: packet mode status (bitwise OR of TIOCPKT_* constants)
+ * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants)
  * @ctrl.packet: packet mode enabled
+ * @ctrl.unused: alignment for Alpha, see @flow.unused for explanation
+ * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS
+ *		handling
+ * @receive_room: bytes permitted to feed to @ldisc without any being lost
+ * @flow_change: controls behavior of throttling, see tty_throttle_safe() and
+ *		 tty_unthrottle_safe()
+ * @link: link to another pty (master -> slave and vice versa)
+ * @fasync: state for %O_ASYNC (for %SIGIO); managed by fasync_helper()
+ * @write_wait: concurrent writers are waiting in this queue until they are
+ *		allowed to write
+ * @read_wait: readers wait for data in this queue
+ * @hangup_work: normally a work to perform a hangup (do_tty_hangup()); while
+ *		 freeing the tty, (re)used to release_one_tty()
+ * @disc_data: pointer to @ldisc's private data (e.g. to &struct n_tty_data)
+ * @driver_data: pointer to @driver's private data (e.g. &struct uart_state)
+ * @files_lock:	protects @tty_files list
+ * @tty_files: list of (re)openers of this tty (i.e. linked &struct
+ *	       tty_file_private)
+ * @closing: when set during close, n_tty processes only START & STOP chars
+ * @write_buf: temporary buffer used during tty_write() to copy user data to
+ * @write_cnt: count of bytes written in tty_write() to @write_buf
+ * @SAK_work: if the tty has a pending do_SAK, it is queued here
+ * @port: persistent storage for this device (i.e. &struct tty_port)
  *
  * All of the state associated with a tty while the tty is open. Persistent
- * storage for tty devices is referenced here as @port in struct tty_port.
+ * storage for tty devices is referenced here as @port and is documented in
+ * &struct tty_port.
  */
 struct tty_struct {
 	int	magic;
 	struct kref kref;
-	struct device *dev;	/* class device or NULL (e.g. ptys, serdev) */
+	struct device *dev;
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
 
-	/* Protects ldisc changes: Lock tty not pty */
 	struct ld_semaphore ldisc_sem;
 	struct tty_ldisc *ldisc;
 
@@ -157,12 +208,11 @@ struct tty_struct {
 	struct mutex throttle_mutex;
 	struct rw_semaphore termios_rwsem;
 	struct mutex winsize_mutex;
-	/* Termios values are protected by the termios rwsem */
 	struct ktermios termios, termios_locked;
 	char name[64];
 	unsigned long flags;
 	int count;
-	struct winsize winsize;		/* winsize_mutex */
+	struct winsize winsize;
 
 	struct {
 		spinlock_t lock;
@@ -181,7 +231,7 @@ struct tty_struct {
 	} __aligned(sizeof(unsigned long)) ctrl;
 
 	int hw_stopped;
-	unsigned int receive_room;	/* Bytes free for queue */
+	unsigned int receive_room;
 	int flow_change;
 
 	struct tty_struct *link;
@@ -191,7 +241,7 @@ struct tty_struct {
 	struct work_struct hangup_work;
 	void *disc_data;
 	void *driver_data;
-	spinlock_t files_lock;		/* protects tty_files list */
+	spinlock_t files_lock;
 	struct list_head tty_files;
 
 #define N_TTY_BUF_SIZE 4096
@@ -199,7 +249,6 @@ struct tty_struct {
 	int closing;
 	unsigned char *write_buf;
 	int write_cnt;
-	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
 	struct tty_port *port;
 } __randomize_layout;
-- 
cgit v1.2.3


From 61c83addb77c65f498a8db0e7113dc3acf753c45 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:50 +0100
Subject: tty: add kernel-doc for tty_port

tty_port used to have only short comments along its members. Convert
them into proper kernel-doc comments in front of the structure. And add
some more explanation to them where needed.

The whole structure purpose and handling is documented at the end too --
some pieces of preexisting text moved to this place.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-3-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_port.h | 104 +++++++++++++++++++++++++++++++----------------
 1 file changed, 70 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h
index 6e86e9e118b6..9091e1c8de4c 100644
--- a/include/linux/tty_port.h
+++ b/include/linux/tty_port.h
@@ -7,17 +7,6 @@
 #include <linux/tty_buffer.h>
 #include <linux/wait.h>
 
-/*
- * Port level information. Each device keeps its own port level information
- * so provide a common structure for those ports wanting to use common support
- * routines.
- *
- * The tty port has a different lifetime to the tty so must be kept apart.
- * In addition be careful as tty -> port mappings are valid for the life
- * of the tty object but in many cases port -> tty mappings are valid only
- * until a hangup so don't use the wrong path.
- */
-
 struct attribute_group;
 struct tty_driver;
 struct tty_port;
@@ -48,30 +37,77 @@ struct tty_port_client_operations {
 
 extern const struct tty_port_client_operations tty_port_default_client_ops;
 
+/**
+ * struct tty_port -- port level information
+ *
+ * @buf: buffer for this port, locked internally
+ * @tty: back pointer to &struct tty_struct, valid only if the tty is open. Use
+ *	 tty_port_tty_get() to obtain it (and tty_kref_put() to release).
+ * @itty: internal back pointer to &struct tty_struct. Avoid this. It should be
+ *	  eliminated in the long term.
+ * @ops: tty port operations (like activate, shutdown), see &struct
+ *	 tty_port_operations
+ * @client_ops: tty port client operations (like receive_buf, write_wakeup).
+ *		By default, tty_port_default_client_ops is used.
+ * @lock: lock protecting @tty
+ * @blocked_open: # of procs waiting for open in tty_port_block_til_ready()
+ * @count: usage count
+ * @open_wait: open waiters queue (waiting e.g. for a carrier)
+ * @delta_msr_wait: modem status change queue (waiting for MSR changes)
+ * @flags: user TTY flags (%ASYNC_)
+ * @iflags: internal flags (%TTY_PORT_)
+ * @console: when set, the port is a console
+ * @mutex: locking, for open, shutdown and other port operations
+ * @buf_mutex: @xmit_buf alloc lock
+ * @xmit_buf: optional xmit buffer used by some drivers
+ * @close_delay: delay in jiffies to wait when closing the port
+ * @closing_wait: delay in jiffies for output to be sent before closing
+ * @drain_delay: set to zero if no pure time based drain is needed else set to
+ *		 size of fifo
+ * @kref: references counter. Reaching zero calls @ops->destruct() if non-%NULL
+ *	  or frees the port otherwise.
+ * @client_data: pointer to private data, for @client_ops
+ *
+ * Each device keeps its own port level information. &struct tty_port was
+ * introduced as a common structure for such information. As every TTY device
+ * shall have a backing tty_port structure, every driver can use these members.
+ *
+ * The tty port has a different lifetime to the tty so must be kept apart.
+ * In addition be careful as tty -> port mappings are valid for the life
+ * of the tty object but in many cases port -> tty mappings are valid only
+ * until a hangup so don't use the wrong path.
+ *
+ * Tty port shall be initialized by tty_port_init() and shut down either by
+ * tty_port_destroy() (refcounting not used), or tty_port_put() (refcounting).
+ *
+ * There is a lot of helpers around &struct tty_port too. To name the most
+ * significant ones: tty_port_open(), tty_port_close() (or
+ * tty_port_close_start() and tty_port_close_end() separately if need be), and
+ * tty_port_hangup(). These call @ops->activate() and @ops->shutdown() as
+ * needed.
+ */
 struct tty_port {
-	struct tty_bufhead	buf;		/* Locked internally */
-	struct tty_struct	*tty;		/* Back pointer */
-	struct tty_struct	*itty;		/* internal back ptr */
-	const struct tty_port_operations *ops;	/* Port operations */
-	const struct tty_port_client_operations *client_ops; /* Port client operations */
-	spinlock_t		lock;		/* Lock protecting tty field */
-	int			blocked_open;	/* Waiting to open */
-	int			count;		/* Usage count */
-	wait_queue_head_t	open_wait;	/* Open waiters */
-	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
-	unsigned long		flags;		/* User TTY flags ASYNC_ */
-	unsigned long		iflags;		/* Internal flags TTY_PORT_ */
-	unsigned char		console:1;	/* port is a console */
-	struct mutex		mutex;		/* Locking */
-	struct mutex		buf_mutex;	/* Buffer alloc lock */
-	unsigned char		*xmit_buf;	/* Optional buffer */
-	unsigned int		close_delay;	/* Close port delay */
-	unsigned int		closing_wait;	/* Delay for output */
-	int			drain_delay;	/* Set to zero if no pure time
-						   based drain is needed else
-						   set to size of fifo */
-	struct kref		kref;		/* Ref counter */
-	void 			*client_data;
+	struct tty_bufhead	buf;
+	struct tty_struct	*tty;
+	struct tty_struct	*itty;
+	const struct tty_port_operations *ops;
+	const struct tty_port_client_operations *client_ops;
+	spinlock_t		lock;
+	int			blocked_open;
+	int			count;
+	wait_queue_head_t	open_wait;
+	wait_queue_head_t	delta_msr_wait;
+	unsigned long		flags;
+	unsigned long		iflags;
+	unsigned char		console:1;
+	struct mutex		mutex;
+	struct mutex		buf_mutex;
+	unsigned char		*xmit_buf;
+	unsigned int		close_delay;
+	unsigned int		closing_wait;
+	int			drain_delay;
+	struct kref		kref;
+	void			*client_data;
 };
 
 /* tty_port::iflags bits -- use atomic bit ops */
-- 
cgit v1.2.3


From a6563830215226aae0e7e6802955c77a6a7b7547 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:51 +0100
Subject: tty: add kernel-doc for tty_driver

tty_driver used to have only short comments along its members. Convert
them into proper kernel-doc comments in front of the structure. And add
some more explanation to them where needed.

The whole structure handling is documented at the end too.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-4-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 62 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 50 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 795b94ccdeb6..3622404a678d 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -291,23 +291,61 @@ struct tty_operations {
 	int (*proc_show)(struct seq_file *, void *);
 } __randomize_layout;
 
+/**
+ * struct tty_driver -- driver for TTY devices
+ *
+ * @magic: set to %TTY_DRIVER_MAGIC in __tty_alloc_driver()
+ * @kref: reference counting. Reaching zero frees all the internals and the
+ *	  driver.
+ * @cdevs: allocated/registered character /dev devices
+ * @owner: modules owning this driver. Used drivers cannot be rmmod'ed.
+ *	   Automatically set by tty_alloc_driver().
+ * @driver_name: name of the driver used in /proc/tty
+ * @name: used for constructing /dev node name
+ * @name_base: used as a number base for constructing /dev node name
+ * @major: major /dev device number (zero for autoassignment)
+ * @minor_start: the first minor /dev device number
+ * @num: number of devices allocated
+ * @type: type of tty driver (%TTY_DRIVER_TYPE_)
+ * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_)
+ * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios)
+ * @flags: tty driver flags (%TTY_DRIVER_)
+ * @proc_entry: proc fs entry, used internally
+ * @other: driver of the linked tty; only used for the PTY driver
+ * @ttys: array of active &struct tty_struct, set by tty_standard_install()
+ * @ports: array of &struct tty_port; can be set during initialization by
+ *	   tty_port_link_device() and similar
+ * @termios: storage for termios at each TTY close for the next open
+ * @driver_state: pointer to driver's arbitrary data
+ * @ops: driver hooks for TTYs. Set them using tty_set_operations(). Use &struct
+ *	 tty_port helpers in them as much as possible.
+ * @tty_drivers: used internally to link tty_drivers together
+ *
+ * The usual handling of &struct tty_driver is to allocate it by
+ * tty_alloc_driver(), set up all the necessary members, and register it by
+ * tty_register_driver(). At last, the driver is torn down by calling
+ * tty_unregister_driver() followed by tty_driver_kref_put().
+ *
+ * The fields required to be set before calling tty_register_driver() include
+ * @driver_name, @name, @type, @subtype, @init_termios, and @ops.
+ */
 struct tty_driver {
-	int	magic;		/* magic number for this structure */
-	struct kref kref;	/* Reference management */
+	int	magic;
+	struct kref kref;
 	struct cdev **cdevs;
 	struct module	*owner;
 	const char	*driver_name;
 	const char	*name;
-	int	name_base;	/* offset of printed name */
-	int	major;		/* major device number */
-	int	minor_start;	/* start of minor device number */
-	unsigned int	num;	/* number of devices allocated */
-	short	type;		/* type of tty driver */
-	short	subtype;	/* subtype of tty driver */
-	struct ktermios init_termios; /* Initial termios */
-	unsigned long	flags;		/* tty driver flags */
-	struct proc_dir_entry *proc_entry; /* /proc fs entry */
-	struct tty_driver *other; /* only used for the PTY driver */
+	int	name_base;
+	int	major;
+	int	minor_start;
+	unsigned int	num;
+	short	type;
+	short	subtype;
+	struct ktermios init_termios;
+	unsigned long	flags;
+	struct proc_dir_entry *proc_entry;
+	struct tty_driver *other;
 
 	/*
 	 * Pointer to the tty data structures
-- 
cgit v1.2.3


From 1fe183091753b1d7f11e70593700c0c0ef268db7 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:52 +0100
Subject: tty: add kernel-doc for tty_operations

tty_operations structure was already documented in a standalone comment
in the header beginning.

Move it right before the structure and reformat it so it complies to
kernel-doc. That way, we can include it in Documentation/ later in this
series.

Note that we named proc_show's parameters, so that we can reference
them in the text.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-5-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 398 +++++++++++++++++++++++++++------------------
 1 file changed, 241 insertions(+), 157 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 3622404a678d..5611992ab26a 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -2,248 +2,332 @@
 #ifndef _LINUX_TTY_DRIVER_H
 #define _LINUX_TTY_DRIVER_H
 
-/*
- * This structure defines the interface between the low-level tty
- * driver and the tty routines.  The following routines can be
- * defined; unless noted otherwise, they are optional, and can be
- * filled in with a null pointer.
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/cdev.h>
+#include <linux/termios.h>
+#include <linux/seq_file.h>
+
+struct tty_struct;
+struct tty_driver;
+struct serial_icounter_struct;
+struct serial_struct;
+
+/**
+ * struct tty_operations -- interface between driver and tty
  *
- * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx)
+ * @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *,
+ *				    int idx)``
  *
- *	Return the tty device corresponding to idx, NULL if there is not
- *	one currently in use and an ERR_PTR value on error. Called under
- *	tty_mutex (for now!)
+ *	Return the tty device corresponding to @idx, %NULL if there is not
+ *	one currently in use and an %ERR_PTR value on error. Called under
+ *	%tty_mutex (for now!)
  *
- *	Optional method. Default behaviour is to use the ttys array
+ *	Optional method. Default behaviour is to use the @self->ttys array.
  *
- * int (*install)(struct tty_driver *self, struct tty_struct *tty)
+ * @install: ``int ()(struct tty_driver *self, struct tty_struct *tty)``
  *
- *	Install a new tty into the tty driver internal tables. Used in
- *	conjunction with lookup and remove methods.
+ *	Install a new @tty into the @self's internal tables. Used in
+ *	conjunction with @lookup and @remove methods.
  *
- *	Optional method. Default behaviour is to use the ttys array
+ *	Optional method. Default behaviour is to use the @self->ttys array.
  *
- * void (*remove)(struct tty_driver *self, struct tty_struct *tty)
+ * @remove: ``void ()(struct tty_driver *self, struct tty_struct *tty)``
  *
- *	Remove a closed tty from the tty driver internal tables. Used in
- *	conjunction with lookup and remove methods.
+ *	Remove a closed @tty from the @self's internal tables. Used in
+ *	conjunction with @lookup and @remove methods.
  *
- *	Optional method. Default behaviour is to use the ttys array
+ *	Optional method. Default behaviour is to use the @self->ttys array.
  *
- * int  (*open)(struct tty_struct * tty, struct file * filp);
+ * @open: ``int ()(struct tty_struct *tty, struct file *)``
  *
- * 	This routine is called when a particular tty device is opened.
- * 	This routine is mandatory; if this routine is not filled in,
- * 	the attempted open will fail with ENODEV.
+ *	This routine is called when a particular @tty device is opened. This
+ *	routine is mandatory; if this routine is not filled in, the attempted
+ *	open will fail with %ENODEV.
  *
  *	Required method. Called with tty lock held.
  *
- * void (*close)(struct tty_struct * tty, struct file * filp);
+ * @close: ``void ()(struct tty_struct *tty, struct file *)``
  *
- * 	This routine is called when a particular tty device is closed.
- *	Note: called even if the corresponding open() failed.
+ *	This routine is called when a particular @tty device is closed.
+ *
+ *	Remark: called even if the corresponding @open() failed.
  *
  *	Required method. Called with tty lock held.
  *
- * void (*shutdown)(struct tty_struct * tty);
+ * @shutdown: ``void ()(struct tty_struct *tty)``
  *
- * 	This routine is called under the tty lock when a particular tty device
- *	is closed for the last time. It executes before the tty resources
- *	are freed so may execute while another function holds a tty kref.
+ *	This routine is called under the tty lock when a particular @tty device
+ *	is closed for the last time. It executes before the @tty resources
+ *	are freed so may execute while another function holds a @tty kref.
  *
- * void (*cleanup)(struct tty_struct * tty);
+ * @cleanup: ``void ()(struct tty_struct *tty)``
  *
- *	This routine is called asynchronously when a particular tty device
+ *	This routine is called asynchronously when a particular @tty device
  *	is closed for the last time freeing up the resources. This is
  *	actually the second part of shutdown for routines that might sleep.
  *
+ * @write: ``int ()(struct tty_struct *tty, const unsigned char *buf,
+ *		    int count)``
  *
- * int (*write)(struct tty_struct * tty,
- * 		 const unsigned char *buf, int count);
- *
- * 	This routine is called by the kernel to write a series of
- * 	characters to the tty device.  The characters may come from
- * 	user space or kernel space.  This routine will return the
+ *	This routine is called by the kernel to write a series (@count) of
+ *	characters (@buf) to the @tty device. The characters may come from
+ *	user space or kernel space.  This routine will return the
  *	number of characters actually accepted for writing.
  *
  *	Optional: Required for writable devices.
  *
- * int (*put_char)(struct tty_struct *tty, unsigned char ch);
+ * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)``
  *
- * 	This routine is called by the kernel to write a single
- * 	character to the tty device.  If the kernel uses this routine,
- * 	it must call the flush_chars() routine (if defined) when it is
- * 	done stuffing characters into the driver.  If there is no room
- * 	in the queue, the character is ignored.
+ *	This routine is called by the kernel to write a single character @ch to
+ *	the @tty device. If the kernel uses this routine, it must call the
+ *	@flush_chars() routine (if defined) when it is done stuffing characters
+ *	into the driver. If there is no room in the queue, the character is
+ *	ignored.
  *
- *	Optional: Kernel will use the write method if not provided.
+ *	Optional: Kernel will use the @write method if not provided. Do not
+ *	call this function directly, call tty_put_char().
  *
- *	Note: Do not call this function directly, call tty_put_char
+ * @flush_chars: ``void ()(struct tty_struct *tty)``
  *
- * void (*flush_chars)(struct tty_struct *tty);
+ *	This routine is called by the kernel after it has written a
+ *	series of characters to the tty device using @put_char().
  *
- * 	This routine is called by the kernel after it has written a
- * 	series of characters to the tty device using put_char().  
+ *	Optional. Do not call this function directly, call
+ *	tty_driver_flush_chars().
  *
- *	Optional:
+ * @write_room: ``unsigned int ()(struct tty_struct *tty)``
  *
- *	Note: Do not call this function directly, call tty_driver_flush_chars
- * 
- * unsigned int  (*write_room)(struct tty_struct *tty);
- *
- * 	This routine returns the numbers of characters the tty driver
- * 	will accept for queuing to be written.  This number is subject
- * 	to change as output buffers get emptied, or if the output flow
+ *	This routine returns the numbers of characters the @tty driver
+ *	will accept for queuing to be written.  This number is subject
+ *	to change as output buffers get emptied, or if the output flow
  *	control is acted.
  *
- *	Required if write method is provided else not needed.
+ *	Required if @write method is provided else not needed. Do not call this
+ *	function directly, call tty_write_room()
  *
- *	Note: Do not call this function directly, call tty_write_room
- * 
- * int  (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg);
+ * @chars_in_buffer: ``unsigned int ()(struct tty_struct *tty)``
  *
- * 	This routine allows the tty driver to implement
- *	device-specific ioctls.  If the ioctl number passed in cmd
- * 	is not recognized by the driver, it should return ENOIOCTLCMD.
+ *	This routine returns the number of characters in the device private
+ *	output queue. Used in tty_wait_until_sent() and for poll()
+ *	implementation.
  *
- *	Optional
+ *	Optional: if not provided, it is assumed there is no queue on the
+ *	device. Do not call this function directly, call tty_chars_in_buffer().
  *
- * long (*compat_ioctl)(struct tty_struct *tty,,
- * 	                unsigned int cmd, unsigned long arg);
+ * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd,
+ *		    unsigned long arg)``
  *
- * 	implement ioctl processing for 32 bit process on 64 bit system
+ *	This routine allows the @tty driver to implement device-specific
+ *	ioctls. If the ioctl number passed in @cmd is not recognized by the
+ *	driver, it should return %ENOIOCTLCMD.
  *
- *	Optional
- * 
- * void (*set_termios)(struct tty_struct *tty, struct ktermios * old);
+ *	Optional.
  *
- * 	This routine allows the tty driver to be notified when
- * 	device's termios settings have changed.
+ * @compat_ioctl: ``long ()(struct tty_struct *tty, unsigned int cmd,
+ *			  unsigned long arg)``
  *
- *	Optional: Called under the termios lock
+ *	Implement ioctl processing for 32 bit process on 64 bit system.
  *
+ *	Optional.
  *
- * void (*set_ldisc)(struct tty_struct *tty);
+ * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)``
  *
- * 	This routine allows the tty driver to be notified when the
- * 	device's termios settings have changed.
+ *	This routine allows the @tty driver to be notified when device's
+ *	termios settings have changed.
  *
- *	Optional: Called under BKL (currently)
- * 
- * void (*throttle)(struct tty_struct * tty);
+ *	Optional: Called under the @tty->termios_rwsem.
  *
- * 	This routine notifies the tty driver that input buffers for
- * 	the line discipline are close to full, and it should somehow
- * 	signal that no more characters should be sent to the tty.
+ * @set_ldisc: ``void ()(struct tty_struct *tty)``
  *
- *	Optional: Always invoke via tty_throttle_safe(), called under the
- *	termios lock.
- * 
- * void (*unthrottle)(struct tty_struct * tty);
+ *	This routine allows the @tty driver to be notified when the device's
+ *	line discipline is being changed.
  *
- * 	This routine notifies the tty drivers that it should signals
- * 	that characters can now be sent to the tty without fear of
- * 	overrunning the input buffers of the line disciplines.
- * 
- *	Optional: Always invoke via tty_unthrottle(), called under the
- *	termios lock.
+ *	Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem.
  *
- * void (*stop)(struct tty_struct *tty);
+ * @throttle: ``void ()(struct tty_struct *tty)``
  *
- * 	This routine notifies the tty driver that it should stop
- * 	outputting characters to the tty device.  
+ *	This routine notifies the @tty driver that input buffers for the line
+ *	discipline are close to full, and it should somehow signal that no more
+ *	characters should be sent to the @tty.
  *
- *	Called with ->flow.lock held. Serialized with start() method.
+ *	Optional: Always invoke via tty_throttle_safe(). Called under the
+ *	@tty->termios_rwsem.
  *
- *	Optional:
+ * @unthrottle: ``void ()(struct tty_struct *tty)``
  *
- *	Note: Call stop_tty not this method.
- * 
- * void (*start)(struct tty_struct *tty);
+ *	This routine notifies the @tty driver that it should signal that
+ *	characters can now be sent to the @tty without fear of overrunning the
+ *	input buffers of the line disciplines.
  *
- * 	This routine notifies the tty driver that it resume sending
+ *	Optional. Always invoke via tty_unthrottle(). Called under the
+ *	@tty->termios_rwsem.
+ *
+ * @stop: ``void ()(struct tty_struct *tty)``
+ *
+ *	This routine notifies the @tty driver that it should stop outputting
  *	characters to the tty device.
  *
- *	Called with ->flow.lock held. Serialized with stop() method.
+ *	Called with @tty->flow.lock held. Serialized with @start() method.
  *
- *	Optional:
+ *	Optional. Always invoke via stop_tty().
  *
- *	Note: Call start_tty not this method.
- * 
- * void (*hangup)(struct tty_struct *tty);
+ * @start: ``void ()(struct tty_struct *tty)``
  *
- * 	This routine notifies the tty driver that it should hang up the
- * 	tty device.
+ *	This routine notifies the @tty driver that it resumed sending
+ *	characters to the @tty device.
  *
- *	Optional:
+ *	Called with @tty->flow.lock held. Serialized with stop() method.
  *
- *	Called with tty lock held.
+ *	Optional. Always invoke via start_tty().
  *
- * int (*break_ctl)(struct tty_struct *tty, int state);
+ * @hangup: ``void ()(struct tty_struct *tty)``
  *
- * 	This optional routine requests the tty driver to turn on or
- * 	off BREAK status on the RS-232 port.  If state is -1,
- * 	then the BREAK status should be turned on; if state is 0, then
- * 	BREAK should be turned off.
+ *	This routine notifies the @tty driver that it should hang up the @tty
+ *	device.
  *
- * 	If this routine is implemented, the high-level tty driver will
- * 	handle the following ioctls: TCSBRK, TCSBRKP, TIOCSBRK,
- * 	TIOCCBRK.
+ *	Optional. Called with tty lock held.
  *
- *	If the driver sets TTY_DRIVER_HARDWARE_BREAK then the interface
- *	will also be called with actual times and the hardware is expected
- *	to do the delay work itself. 0 and -1 are still used for on/off.
+ * @break_ctl: ``int ()(struct tty_struct *tty, int state)``
  *
- *	Optional: Required for TCSBRK/BRKP/etc handling.
+ *	This optional routine requests the @tty driver to turn on or off BREAK
+ *	status on the RS-232 port. If @state is -1, then the BREAK status
+ *	should be turned on; if @state is 0, then BREAK should be turned off.
  *
- * void (*wait_until_sent)(struct tty_struct *tty, int timeout);
- * 
- * 	This routine waits until the device has written out all of the
- * 	characters in its transmitter FIFO.
+ *	If this routine is implemented, the high-level tty driver will handle
+ *	the following ioctls: %TCSBRK, %TCSBRKP, %TIOCSBRK, %TIOCCBRK.
+ *
+ *	If the driver sets %TTY_DRIVER_HARDWARE_BREAK in tty_alloc_driver(),
+ *	then the interface will also be called with actual times and the
+ *	hardware is expected to do the delay work itself. 0 and -1 are still
+ *	used for on/off.
+ *
+ *	Optional: Required for %TCSBRK/%BRKP/etc. handling.
+ *
+ * @flush_buffer: ``void ()(struct tty_struct *tty)``
+ *
+ *	This routine discards device private output buffer. Invoked on close,
+ *	hangup, to implement %TCOFLUSH ioctl and similar.
+ *
+ *	Optional: if not provided, it is assumed there is no queue on the
+ *	device. Do not call this function directly, call
+ *	tty_driver_flush_buffer().
+ *
+ * @wait_until_sent: ``void ()(struct tty_struct *tty, int timeout)``
+ *
+ *	This routine waits until the device has written out all of the
+ *	characters in its transmitter FIFO. Or until @timeout (in jiffies) is
+ *	reached.
+ *
+ *	Optional: If not provided, the device is assumed to have no FIFO.
+ *	Usually correct to invoke via tty_wait_until_sent().
+ *
+ * @send_xchar: ``void ()(struct tty_struct *tty, char ch)``
+ *
+ *	This routine is used to send a high-priority XON/XOFF character (@ch)
+ *	to the @tty device.
  *
- *	Optional: If not provided the device is assumed to have no FIFO
+ *	Optional: If not provided, then the @write method is called under
+ *	the @tty->atomic_write_lock to keep it serialized with the ldisc.
  *
- *	Note: Usually correct to call tty_wait_until_sent
+ * @tiocmget: ``int ()(struct tty_struct *tty)``
  *
- * void (*send_xchar)(struct tty_struct *tty, char ch);
+ *	This routine is used to obtain the modem status bits from the @tty
+ *	driver.
  *
- * 	This routine is used to send a high-priority XON/XOFF
- * 	character to the device.
+ *	Optional: If not provided, then %ENOTTY is returned from the %TIOCMGET
+ *	ioctl. Do not call this function directly, call tty_tiocmget().
  *
- *	Optional: If not provided then the write method is called under
- *	the atomic write lock to keep it serialized with the ldisc.
+ * @tiocmset: ``int ()(struct tty_struct *tty,
+ *		       unsigned int set, unsigned int clear)``
  *
- * int (*resize)(struct tty_struct *tty, struct winsize *ws)
+ *	This routine is used to set the modem status bits to the @tty driver.
+ *	First, @clear bits should be cleared, then @set bits set.
  *
- *	Called when a termios request is issued which changes the
- *	requested terminal geometry.
+ *	Optional: If not provided, then %ENOTTY is returned from the %TIOCMSET
+ *	ioctl. Do not call this function directly, call tty_tiocmset().
+ *
+ * @resize: ``int ()(struct tty_struct *tty, struct winsize *ws)``
+ *
+ *	Called when a termios request is issued which changes the requested
+ *	terminal geometry to @ws.
  *
  *	Optional: the default action is to update the termios structure
  *	without error. This is usually the correct behaviour. Drivers should
- *	not force errors here if they are not resizable objects (eg a serial
+ *	not force errors here if they are not resizable objects (e.g. a serial
  *	line). See tty_do_resize() if you need to wrap the standard method
- *	in your own logic - the usual case.
+ *	in your own logic -- the usual case.
+ *
+ * @get_icount: ``int ()(struct tty_struct *tty,
+ *			 struct serial_icounter *icount)``
+ *
+ *	Called when the @tty device receives a %TIOCGICOUNT ioctl. Passed a
+ *	kernel structure @icount to complete.
+ *
+ *	Optional: called only if provided, otherwise %ENOTTY will be returned.
+ *
+ * @get_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)``
+ *
+ *	Called when the @tty device receives a %TIOCGSERIAL ioctl. Passed a
+ *	kernel structure @p (&struct serial_struct) to complete.
+ *
+ *	Optional: called only if provided, otherwise %ENOTTY will be returned.
+ *	Do not call this function directly, call tty_tiocgserial().
+ *
+ * @set_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)``
+ *
+ *	Called when the @tty device receives a %TIOCSSERIAL ioctl. Passed a
+ *	kernel structure @p (&struct serial_struct) to set the values from.
+ *
+ *	Optional: called only if provided, otherwise %ENOTTY will be returned.
+ *	Do not call this function directly, call tty_tiocsserial().
+ *
+ * @show_fdinfo: ``void ()(struct tty_struct *tty, struct seq_file *m)``
+ *
+ *	Called when the @tty device file descriptor receives a fdinfo request
+ *	from VFS (to show in /proc/<pid>/fdinfo/). @m should be filled with
+ *	information.
+ *
+ *	Optional: called only if provided, otherwise nothing is written to @m.
+ *	Do not call this function directly, call tty_show_fdinfo().
+ *
+ * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)``
+ *
+ *	kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is
+ *	called to initialize the HW for later use by calling @poll_get_char or
+ *	@poll_put_char.
+ *
+ *	Optional: called only if provided, otherwise skipped as a non-polling
+ *	driver.
+ *
+ * @poll_get_char: ``int ()(struct tty_driver *driver, int line)``
+ *
+ *	kgdboc support (see @poll_init). @driver should read a character from a
+ *	tty identified by @line and return it.
+ *
+ *	Optional: called only if @poll_init provided.
  *
- * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount);
+ * @poll_put_char: ``void ()(struct tty_driver *driver, int line, char ch)``
  *
- *	Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel
- *	structure to complete. This method is optional and will only be called
- *	if provided (otherwise ENOTTY will be returned).
+ *	kgdboc support (see @poll_init). @driver should write character @ch to
+ *	a tty identified by @line.
+ *
+ *	Optional: called only if @poll_init provided.
+ *
+ * @proc_show: ``int ()(struct seq_file *m, void *driver)``
+ *
+ *	Driver @driver (cast to &struct tty_driver) can show additional info in
+ *	/proc/tty/driver/<driver_name>. It is enough to fill in the information
+ *	into @m.
+ *
+ *	Optional: called only if provided, otherwise no /proc entry created.
+ *
+ * This structure defines the interface between the low-level tty driver and
+ * the tty routines. These routines can be defined. Unless noted otherwise,
+ * they are optional, and can be filled in with a %NULL pointer.
  */
-
-#include <linux/export.h>
-#include <linux/fs.h>
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/cdev.h>
-#include <linux/termios.h>
-#include <linux/seq_file.h>
-
-struct tty_struct;
-struct tty_driver;
-struct serial_icounter_struct;
-struct serial_struct;
-
 struct tty_operations {
 	struct tty_struct * (*lookup)(struct tty_driver *driver,
 			struct file *filp, int idx);
@@ -288,7 +372,7 @@ struct tty_operations {
 	int (*poll_get_char)(struct tty_driver *driver, int line);
 	void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
 #endif
-	int (*proc_show)(struct seq_file *, void *);
+	int (*proc_show)(struct seq_file *m, void *driver);
 } __randomize_layout;
 
 /**
-- 
cgit v1.2.3


From 630bf86d15778fd3e5df17cb6e00839d0f44a707 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:53 +0100
Subject: tty: add kernel-doc for tty_port_operations

tty_port_operations used to have only comments along its members.
Convert them into proper kernel-doc comments in front of the structure.
And add some more explanation to them where needed.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-6-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_port.h | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h
index 9091e1c8de4c..d3ea9ed0b98e 100644
--- a/include/linux/tty_port.h
+++ b/include/linux/tty_port.h
@@ -12,21 +12,28 @@ struct tty_driver;
 struct tty_port;
 struct tty_struct;
 
+/**
+ * struct tty_port_operations -- operations on tty_port
+ * @carrier_raised: return 1 if the carrier is raised on @port
+ * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR
+ * @shutdown: called when the last close completes or a hangup finishes IFF the
+ *	port was initialized. Do not use to free resources. Turn off the device
+ *	only. Called under the port mutex to serialize against @activate and
+ *	@shutdown.
+ * @activate: called under the port mutex from tty_port_open(), serialized using
+ *	the port mutex. Supposed to turn on the device.
+ *
+ *	FIXME: long term getting the tty argument *out* of this would be good
+ *	for consoles.
+ *
+ * @destruct: called on the final put of a port. Free resources, possibly incl.
+ *	the port itself.
+ */
 struct tty_port_operations {
-	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
-	/* Control the DTR line */
 	void (*dtr_rts)(struct tty_port *port, int raise);
-	/* Called when the last close completes or a hangup finishes
-	   IFF the port was initialized. Do not use to free resources. Called
-	   under the port mutex to serialize against activate/shutdowns */
 	void (*shutdown)(struct tty_port *port);
-	/* Called under the port mutex from tty_port_open, serialized using
-	   the port mutex */
-        /* FIXME: long term getting the tty argument *out* of this would be
-           good for consoles */
 	int (*activate)(struct tty_port *port, struct tty_struct *tty);
-	/* Called on the final put of a port */
 	void (*destruct)(struct tty_port *port);
 };
 
-- 
cgit v1.2.3


From 0c6119f9f7dc03a53bd35ca5a77926eef3c33d10 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:54 +0100
Subject: tty: add kernel-doc for tty_ldisc_ops

tty_ldisc_ops structure was already partially documented in a standalone
comment in the header beginning.

Move it right before the structure and reformat it so it complies to
kernel-doc. That way, we can include it in Documentation/ later in this
series.

And add the documentation for the members where missing too.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-7-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_ldisc.h | 259 +++++++++++++++++++++++-----------------------
 1 file changed, 130 insertions(+), 129 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 25f07017bbad..e0da0ba02de9 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -4,126 +4,6 @@
 
 struct tty_struct;
 
-/*
- * This structure defines the interface between the tty line discipline
- * implementation and the tty routines.  The following routines can be
- * defined; unless noted otherwise, they are optional, and can be
- * filled in with a null pointer.
- *
- * int	(*open)(struct tty_struct *);
- *
- *	This function is called when the line discipline is associated
- *	with the tty.  The line discipline can use this as an
- *	opportunity to initialize any state needed by the ldisc routines.
- *
- * void	(*close)(struct tty_struct *);
- *
- *	This function is called when the line discipline is being
- *	shutdown, either because the tty is being closed or because
- *	the tty is being changed to use a new line discipline
- *
- * void	(*flush_buffer)(struct tty_struct *tty);
- *
- *	This function instructs the line discipline to clear its
- *	buffers of any input characters it may have queued to be
- *	delivered to the user mode process.
- *
- * ssize_t (*read)(struct tty_struct * tty, struct file * file,
- *		   unsigned char * buf, size_t nr);
- *
- *	This function is called when the user requests to read from
- *	the tty.  The line discipline will return whatever characters
- *	it has buffered up for the user.  If this function is not
- *	defined, the user will receive an EIO error.
- *
- * ssize_t (*write)(struct tty_struct * tty, struct file * file,
- *		    const unsigned char * buf, size_t nr);
- *
- *	This function is called when the user requests to write to the
- *	tty.  The line discipline will deliver the characters to the
- *	low-level tty device for transmission, optionally performing
- *	some processing on the characters first.  If this function is
- *	not defined, the user will receive an EIO error.
- *
- * int	(*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg);
- *
- *	This function is called when the user requests an ioctl which
- *	is not handled by the tty layer or the low-level tty driver.
- *	It is intended for ioctls which affect line discpline
- *	operation.  Note that the search order for ioctls is (1) tty
- *	layer, (2) tty low-level driver, (3) line discpline.  So a
- *	low-level driver can "grab" an ioctl request before the line
- *	discpline has a chance to see it.
- *
- * int	(*compat_ioctl)(struct tty_struct *tty, unsigned int cmd,
- *			unsigned long arg);
- *
- *	Process ioctl calls from 32-bit process on 64-bit system
- *
- *	NOTE: only ioctls that are neither "pointer to compatible
- *	structure" nor tty-generic.  Something private that takes
- *	an integer or a pointer to wordsize-sensitive structure
- *	belongs here, but most of ldiscs will happily leave
- *	it NULL.
- *
- * void	(*set_termios)(struct tty_struct *tty, struct ktermios * old);
- *
- *	This function notifies the line discpline that a change has
- *	been made to the termios structure.
- *
- * int	(*poll)(struct tty_struct * tty, struct file * file,
- *		  poll_table *wait);
- *
- *	This function is called when a user attempts to select/poll on a
- *	tty device.  It is solely the responsibility of the line
- *	discipline to handle poll requests.
- *
- * void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
- *		       char *fp, int count);
- *
- *	This function is called by the low-level tty driver to send
- *	characters received by the hardware to the line discpline for
- *	processing.  <cp> is a pointer to the buffer of input
- *	character received by the device.  <fp> is a pointer to a
- *	pointer of flag bytes which indicate whether a character was
- *	received with a parity error, etc. <fp> may be NULL to indicate
- *	all data received is TTY_NORMAL.
- *
- * void	(*write_wakeup)(struct tty_struct *);
- *
- *	This function is called by the low-level tty driver to signal
- *	that line discpline should try to send more characters to the
- *	low-level driver for transmission.  If the line discpline does
- *	not have any more data to send, it can just return. If the line
- *	discipline does have some data to send, please arise a tasklet
- *	or workqueue to do the real data transfer. Do not send data in
- *	this hook, it may leads to a deadlock.
- *
- * int (*hangup)(struct tty_struct *)
- *
- *	Called on a hangup. Tells the discipline that it should
- *	cease I/O to the tty driver. Can sleep. The driver should
- *	seek to perform this action quickly but should wait until
- *	any pending driver I/O is completed.
- *
- * void (*dcd_change)(struct tty_struct *tty, unsigned int status)
- *
- *	Tells the discipline that the DCD pin has changed its status.
- *	Used exclusively by the N_PPS (Pulse-Per-Second) line discipline.
- *
- * int	(*receive_buf2)(struct tty_struct *, const unsigned char *cp,
- *			char *fp, int count);
- *
- *	This function is called by the low-level tty driver to send
- *	characters received by the hardware to the line discpline for
- *	processing.  <cp> is a pointer to the buffer of input
- *	character received by the device.  <fp> is a pointer to a
- *	pointer of flag bytes which indicate whether a character was
- *	received with a parity error, etc. <fp> may be NULL to indicate
- *	all data received is TTY_NORMAL.
- *	If assigned, prefer this function for automatic flow control.
- */
-
 #include <linux/fs.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
@@ -175,7 +55,128 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
 		ldsem_down_write(sem, timeout)
 #endif
 
-
+/**
+ * struct tty_ldisc_ops - ldisc operations
+ *
+ * @name: name of this ldisc rendered in /proc/tty/ldiscs
+ * @num: ``N_*`` number (%N_TTY, %N_HDLC, ...) reserved to this ldisc
+ *
+ * @open: ``int ()(struct tty_struct *tty)``
+ *
+ *	This function is called when the line discipline is associated with the
+ *	@tty. The line discipline can use this as an opportunity to initialize
+ *	any state needed by the ldisc routines.
+ *
+ * @close: ``void ()(struct tty_struct *tty)``
+ *
+ *	This function is called when the line discipline is being shutdown,
+ *	either because the @tty is being closed or because the @tty is being
+ *	changed to use a new line discipline
+ *
+ * @flush_buffer: ``void ()(struct tty_struct *tty)``
+ *
+ *	This function instructs the line discipline to clear its buffers of any
+ *	input characters it may have queued to be delivered to the user mode
+ *	process.
+ *
+ * @read: ``ssize_t ()(struct tty_struct *tty, struct file *file,
+ *		unsigned char *buf, size_t nr)``
+ *
+ *	This function is called when the user requests to read from the @tty.
+ *	The line discipline will return whatever characters it has buffered up
+ *	for the user. If this function is not defined, the user will receive
+ *	an %EIO error.
+ *
+ * @write: ``ssize_t ()(struct tty_struct *tty, struct file *file,
+ *		const unsigned char *buf, size_t nr)``
+ *
+ *	This function is called when the user requests to write to the @tty.
+ *	The line discipline will deliver the characters to the low-level tty
+ *	device for transmission, optionally performing some processing on the
+ *	characters first. If this function is not defined, the user will
+ *	receive an %EIO error.
+ *
+ * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd,
+ *		unsigned long arg)``
+ *
+ *	This function is called when the user requests an ioctl which is not
+ *	handled by the tty layer or the low-level tty driver. It is intended
+ *	for ioctls which affect line discpline operation.  Note that the search
+ *	order for ioctls is (1) tty layer, (2) tty low-level driver, (3) line
+ *	discpline. So a low-level driver can "grab" an ioctl request before
+ *	the line discpline has a chance to see it.
+ *
+ * @compat_ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd,
+ *		unsigned long arg)``
+ *
+ *	Process ioctl calls from 32-bit process on 64-bit system.
+ *
+ *	Note that only ioctls that are neither "pointer to compatible
+ *	structure" nor tty-generic.  Something private that takes an integer or
+ *	a pointer to wordsize-sensitive structure belongs here, but most of
+ *	ldiscs will happily leave it %NULL.
+ *
+ * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)``
+ *
+ *	This function notifies the line discpline that a change has been made
+ *	to the termios structure.
+ *
+ * @poll: ``int ()(struct tty_struct *tty, struct file *file,
+ *		  struct poll_table_struct *wait)``
+ *
+ *	This function is called when a user attempts to select/poll on a @tty
+ *	device. It is solely the responsibility of the line discipline to
+ *	handle poll requests.
+ *
+ * @hangup: ``void ()(struct tty_struct *tty)``
+ *
+ *	Called on a hangup. Tells the discipline that it should cease I/O to
+ *	the tty driver. Can sleep. The driver should seek to perform this
+ *	action quickly but should wait until any pending driver I/O is
+ *	completed.
+ *
+ * @receive_buf: ``void ()(struct tty_struct *tty, const unsigned char *cp,
+ *		       const char *fp, int count)``
+ *
+ *	This function is called by the low-level tty driver to send characters
+ *	received by the hardware to the line discpline for processing. @cp is
+ *	a pointer to the buffer of input character received by the device. @fp
+ *	is a pointer to an array of flag bytes which indicate whether a
+ *	character was received with a parity error, etc. @fp may be %NULL to
+ *	indicate all data received is %TTY_NORMAL.
+ *
+ * @write_wakeup: ``void ()(struct tty_struct *tty)``
+ *
+ *	This function is called by the low-level tty driver to signal that line
+ *	discpline should try to send more characters to the low-level driver
+ *	for transmission. If the line discpline does not have any more data to
+ *	send, it can just return. If the line discipline does have some data to
+ *	send, please arise a tasklet or workqueue to do the real data transfer.
+ *	Do not send data in this hook, it may lead to a deadlock.
+ *
+ * @dcd_change: ``void ()(struct tty_struct *tty, unsigned int status)``
+ *
+ *	Tells the discipline that the DCD pin has changed its status. Used
+ *	exclusively by the %N_PPS (Pulse-Per-Second) line discipline.
+ *
+ * @receive_buf2: ``int ()(struct tty_struct *tty, const unsigned char *cp,
+ *			const char *fp, int count)``
+ *
+ *	This function is called by the low-level tty driver to send characters
+ *	received by the hardware to the line discpline for processing. @cp is a
+ *	pointer to the buffer of input character received by the device.  @fp
+ *	is a pointer to an array of flag bytes which indicate whether a
+ *	character was received with a parity error, etc. @fp may be %NULL to
+ *	indicate all data received is %TTY_NORMAL. If assigned, prefer this
+ *	function for automatic flow control.
+ *
+ * @owner: module containting this ldisc (for reference counting)
+ *
+ * This structure defines the interface between the tty line discipline
+ * implementation and the tty routines. The above routines can be defined.
+ * Unless noted otherwise, they are optional, and can be filled in with a %NULL
+ * pointer.
+ */
 struct tty_ldisc_ops {
 	char	*name;
 	int	num;
@@ -183,8 +184,8 @@ struct tty_ldisc_ops {
 	/*
 	 * The following routines are called from above.
 	 */
-	int	(*open)(struct tty_struct *);
-	void	(*close)(struct tty_struct *);
+	int	(*open)(struct tty_struct *tty);
+	void	(*close)(struct tty_struct *tty);
 	void	(*flush_buffer)(struct tty_struct *tty);
 	ssize_t	(*read)(struct tty_struct *tty, struct file *file,
 			unsigned char *buf, size_t nr,
@@ -196,18 +197,18 @@ struct tty_ldisc_ops {
 	int	(*compat_ioctl)(struct tty_struct *tty, unsigned int cmd,
 			unsigned long arg);
 	void	(*set_termios)(struct tty_struct *tty, struct ktermios *old);
-	__poll_t (*poll)(struct tty_struct *, struct file *,
-			     struct poll_table_struct *);
+	__poll_t (*poll)(struct tty_struct *tty, struct file *file,
+			     struct poll_table_struct *wait);
 	void	(*hangup)(struct tty_struct *tty);
 
 	/*
 	 * The following routines are called from below.
 	 */
-	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
+	void	(*receive_buf)(struct tty_struct *tty, const unsigned char *cp,
 			       const char *fp, int count);
-	void	(*write_wakeup)(struct tty_struct *);
-	void	(*dcd_change)(struct tty_struct *, unsigned int);
-	int	(*receive_buf2)(struct tty_struct *, const unsigned char *cp,
+	void	(*write_wakeup)(struct tty_struct *tty);
+	void	(*dcd_change)(struct tty_struct *tty, unsigned int status);
+	int	(*receive_buf2)(struct tty_struct *tty, const unsigned char *cp,
 				const char *fp, int count);
 
 	struct  module *owner;
-- 
cgit v1.2.3


From 29d5ef685948369602ccd5c04d2a215449c4b943 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:55 +0100
Subject: tty: combine tty_operations triple docs into kernel-doc

In Documentation/driver-api/serial/tty.rst, there are triplicated texts
about some struct tty_operations' hooks. Combine them into existing
kernel-doc comments of struct tty_operations and drop them from the
Documentation/.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-8-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 5611992ab26a..41274d551e28 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -47,15 +47,17 @@ struct serial_struct;
  *	routine is mandatory; if this routine is not filled in, the attempted
  *	open will fail with %ENODEV.
  *
- *	Required method. Called with tty lock held.
+ *	Required method. Called with tty lock held. May sleep.
  *
  * @close: ``void ()(struct tty_struct *tty, struct file *)``
  *
- *	This routine is called when a particular @tty device is closed.
+ *	This routine is called when a particular @tty device is closed. At the
+ *	point of return from this call the driver must make no further ldisc
+ *	calls of any kind.
  *
  *	Remark: called even if the corresponding @open() failed.
  *
- *	Required method. Called with tty lock held.
+ *	Required method. Called with tty lock held. May sleep.
  *
  * @shutdown: ``void ()(struct tty_struct *tty)``
  *
@@ -77,7 +79,10 @@ struct serial_struct;
  *	user space or kernel space.  This routine will return the
  *	number of characters actually accepted for writing.
  *
- *	Optional: Required for writable devices.
+ *	May occur in parallel in special cases. Because this includes panic
+ *	paths drivers generally shouldn't try and do clever locking here.
+ *
+ *	Optional: Required for writable devices. May not sleep.
  *
  * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)``
  *
@@ -105,6 +110,9 @@ struct serial_struct;
  *	to change as output buffers get emptied, or if the output flow
  *	control is acted.
  *
+ *	The ldisc is responsible for being intelligent about multi-threading of
+ *	write_room/write calls
+ *
  *	Required if @write method is provided else not needed. Do not call this
  *	function directly, call tty_write_room()
  *
@@ -136,14 +144,21 @@ struct serial_struct;
  * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)``
  *
  *	This routine allows the @tty driver to be notified when device's
- *	termios settings have changed.
+ *	termios settings have changed. New settings are in @tty->termios.
+ *	Previous settings are passed in the @old argument.
  *
- *	Optional: Called under the @tty->termios_rwsem.
+ *	The API is defined such that the driver should return the actual modes
+ *	selected. This means that the driver is responsible for modifying any
+ *	bits in @tty->termios it cannot fulfill to indicate the actual modes
+ *	being used.
+ *
+ *	Optional. Called under the @tty->termios_rwsem. May sleep.
  *
  * @set_ldisc: ``void ()(struct tty_struct *tty)``
  *
  *	This routine allows the @tty driver to be notified when the device's
- *	line discipline is being changed.
+ *	line discipline is being changed. At the point this is done the
+ *	discipline is not yet usable.
  *
  *	Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem.
  *
@@ -153,6 +168,9 @@ struct serial_struct;
  *	discipline are close to full, and it should somehow signal that no more
  *	characters should be sent to the @tty.
  *
+ *	Serialization including with @unthrottle() is the job of the ldisc
+ *	layer.
+ *
  *	Optional: Always invoke via tty_throttle_safe(). Called under the
  *	@tty->termios_rwsem.
  *
@@ -204,7 +222,7 @@ struct serial_struct;
  *	hardware is expected to do the delay work itself. 0 and -1 are still
  *	used for on/off.
  *
- *	Optional: Required for %TCSBRK/%BRKP/etc. handling.
+ *	Optional: Required for %TCSBRK/%BRKP/etc. handling. May sleep.
  *
  * @flush_buffer: ``void ()(struct tty_struct *tty)``
  *
@@ -222,7 +240,7 @@ struct serial_struct;
  *	reached.
  *
  *	Optional: If not provided, the device is assumed to have no FIFO.
- *	Usually correct to invoke via tty_wait_until_sent().
+ *	Usually correct to invoke via tty_wait_until_sent(). May sleep.
  *
  * @send_xchar: ``void ()(struct tty_struct *tty, char ch)``
  *
-- 
cgit v1.2.3


From 40f4268cddb93d17a11579920d940c2dca8b9445 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:56 +0100
Subject: tty: combine tty_ldisc_ops docs into kernel-doc

In Documentation/driver-api/serial/tty.rst, there are duplicated texts
about some struct tty_ldisc_ops' hooks. Combine them into existing
kernel-doc comments of struct tty_ldisc_ops and drop them from the
Documentation/.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-9-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_ldisc.h | 67 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index e0da0ba02de9..e85002b56752 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -61,33 +61,45 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  * @name: name of this ldisc rendered in /proc/tty/ldiscs
  * @num: ``N_*`` number (%N_TTY, %N_HDLC, ...) reserved to this ldisc
  *
- * @open: ``int ()(struct tty_struct *tty)``
+ * @open: [TTY] ``int ()(struct tty_struct *tty)``
  *
  *	This function is called when the line discipline is associated with the
- *	@tty. The line discipline can use this as an opportunity to initialize
- *	any state needed by the ldisc routines.
+ *	@tty. No other call into the line discipline for this tty will occur
+ *	until it completes successfully. It should initialize any state needed
+ *	by the ldisc, and set @tty->receive_room to the maximum amount of data
+ *	the line discipline is willing to accept from the driver with a single
+ *	call to @receive_buf(). Returning an error will prevent the ldisc from
+ *	being attached.
  *
- * @close: ``void ()(struct tty_struct *tty)``
+ *	Can sleep.
+ *
+ * @close: [TTY] ``void ()(struct tty_struct *tty)``
  *
  *	This function is called when the line discipline is being shutdown,
  *	either because the @tty is being closed or because the @tty is being
- *	changed to use a new line discipline
+ *	changed to use a new line discipline. At the point of execution no
+ *	further users will enter the ldisc code for this tty.
+ *
+ *	Can sleep.
  *
- * @flush_buffer: ``void ()(struct tty_struct *tty)``
+ * @flush_buffer: [TTY] ``void ()(struct tty_struct *tty)``
  *
  *	This function instructs the line discipline to clear its buffers of any
  *	input characters it may have queued to be delivered to the user mode
- *	process.
+ *	process. It may be called at any point between open and close.
  *
- * @read: ``ssize_t ()(struct tty_struct *tty, struct file *file,
+ * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file,
  *		unsigned char *buf, size_t nr)``
  *
  *	This function is called when the user requests to read from the @tty.
  *	The line discipline will return whatever characters it has buffered up
  *	for the user. If this function is not defined, the user will receive
- *	an %EIO error.
+ *	an %EIO error. Multiple read calls may occur in parallel and the ldisc
+ *	must deal with serialization issues.
+ *
+ *	Can sleep.
  *
- * @write: ``ssize_t ()(struct tty_struct *tty, struct file *file,
+ * @write: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file,
  *		const unsigned char *buf, size_t nr)``
  *
  *	This function is called when the user requests to write to the @tty.
@@ -96,7 +108,9 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  *	characters first. If this function is not defined, the user will
  *	receive an %EIO error.
  *
- * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd,
+ *	Can sleep.
+ *
+ * @ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd,
  *		unsigned long arg)``
  *
  *	This function is called when the user requests an ioctl which is not
@@ -106,7 +120,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  *	discpline. So a low-level driver can "grab" an ioctl request before
  *	the line discpline has a chance to see it.
  *
- * @compat_ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd,
+ * @compat_ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd,
  *		unsigned long arg)``
  *
  *	Process ioctl calls from 32-bit process on 64-bit system.
@@ -116,27 +130,29 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  *	a pointer to wordsize-sensitive structure belongs here, but most of
  *	ldiscs will happily leave it %NULL.
  *
- * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)``
+ * @set_termios: [TTY] ``void ()(struct tty_struct *tty, struct ktermios *old)``
  *
  *	This function notifies the line discpline that a change has been made
  *	to the termios structure.
  *
- * @poll: ``int ()(struct tty_struct *tty, struct file *file,
+ * @poll: [TTY] ``int ()(struct tty_struct *tty, struct file *file,
  *		  struct poll_table_struct *wait)``
  *
  *	This function is called when a user attempts to select/poll on a @tty
  *	device. It is solely the responsibility of the line discipline to
  *	handle poll requests.
  *
- * @hangup: ``void ()(struct tty_struct *tty)``
+ * @hangup: [TTY] ``void ()(struct tty_struct *tty)``
  *
  *	Called on a hangup. Tells the discipline that it should cease I/O to
- *	the tty driver. Can sleep. The driver should seek to perform this
- *	action quickly but should wait until any pending driver I/O is
- *	completed.
+ *	the tty driver. The driver should seek to perform this action quickly
+ *	but should wait until any pending driver I/O is completed. No further
+ *	calls into the ldisc code will occur.
+ *
+ *	Can sleep.
  *
- * @receive_buf: ``void ()(struct tty_struct *tty, const unsigned char *cp,
- *		       const char *fp, int count)``
+ * @receive_buf: [DRV] ``void ()(struct tty_struct *tty,
+ *		       const unsigned char *cp, const char *fp, int count)``
  *
  *	This function is called by the low-level tty driver to send characters
  *	received by the hardware to the line discpline for processing. @cp is
@@ -145,7 +161,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  *	character was received with a parity error, etc. @fp may be %NULL to
  *	indicate all data received is %TTY_NORMAL.
  *
- * @write_wakeup: ``void ()(struct tty_struct *tty)``
+ * @write_wakeup: [DRV] ``void ()(struct tty_struct *tty)``
  *
  *	This function is called by the low-level tty driver to signal that line
  *	discpline should try to send more characters to the low-level driver
@@ -154,13 +170,13 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  *	send, please arise a tasklet or workqueue to do the real data transfer.
  *	Do not send data in this hook, it may lead to a deadlock.
  *
- * @dcd_change: ``void ()(struct tty_struct *tty, unsigned int status)``
+ * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, unsigned int status)``
  *
  *	Tells the discipline that the DCD pin has changed its status. Used
  *	exclusively by the %N_PPS (Pulse-Per-Second) line discipline.
  *
- * @receive_buf2: ``int ()(struct tty_struct *tty, const unsigned char *cp,
- *			const char *fp, int count)``
+ * @receive_buf2: [DRV] ``int ()(struct tty_struct *tty,
+ *			const unsigned char *cp, const char *fp, int count)``
  *
  *	This function is called by the low-level tty driver to send characters
  *	received by the hardware to the line discpline for processing. @cp is a
@@ -176,6 +192,9 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
  * implementation and the tty routines. The above routines can be defined.
  * Unless noted otherwise, they are optional, and can be filled in with a %NULL
  * pointer.
+ *
+ * Hooks marked [TTY] are invoked from the TTY core, the [DRV] ones from the
+ * tty_driver side.
  */
 struct tty_ldisc_ops {
 	char	*name;
-- 
cgit v1.2.3


From 4072254f96f954ec0d34899f15d987803b6d76a2 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:57 +0100
Subject: tty: reformat tty_struct::flags into kernel-doc

Move the partial tty_struct::flags documentation from tty_ldisc to the
tty.h header and combine it with the one-liners present there. Convert
all those to kernel-doc. This way, we can simply reference the
documentation in Documentation while the text is still along the
definitions.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-10-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 74 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index da49ad9be281..7b0a5d478ef6 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -263,26 +263,72 @@ struct tty_file_private {
 /* tty magic number */
 #define TTY_MAGIC		0x5401
 
-/*
- * These bits are used in the flags field of the tty structure.
+/**
+ * DOC: TTY Struct Flags
+ *
+ * These bits are used in the :c:member:`tty_struct.flags` field.
  *
  * So that interrupts won't be able to mess up the queues,
  * copy_to_cooked must be atomic with respect to itself, as must
  * tty->write.  Thus, you must use the inline functions set_bit() and
  * clear_bit() to make things atomic.
+ *
+ * TTY_THROTTLED
+ *	Driver input is throttled. The ldisc should call
+ *	:c:member:`tty_driver.unthrottle()` in order to resume reception when
+ *	it is ready to process more data (at threshold min).
+ *
+ * TTY_IO_ERROR
+ *	If set, causes all subsequent userspace read/write calls on the tty to
+ *	fail, returning -%EIO. (May be no ldisc too.)
+ *
+ * TTY_OTHER_CLOSED
+ *	Device is a pty and the other side has closed.
+ *
+ * TTY_EXCLUSIVE
+ *	Exclusive open mode (a single opener).
+ *
+ * TTY_DO_WRITE_WAKEUP
+ *	If set, causes the driver to call the
+ *	:c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume
+ *	transmission when it can accept more data to transmit.
+ *
+ * TTY_LDISC_OPEN
+ *	Indicates that a line discipline is open. For debugging purposes only.
+ *
+ * TTY_PTY_LOCK
+ *	A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic.
+ *
+ * TTY_NO_WRITE_SPLIT
+ *	Prevent driver from splitting up writes into smaller chunks (preserve
+ *	write boundaries to driver).
+ *
+ * TTY_HUPPED
+ *	The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`.
+ *
+ * TTY_HUPPING
+ *	The TTY is in the process of hanging up to abort potential readers.
+ *
+ * TTY_LDISC_CHANGING
+ *	Line discipline for this TTY is being changed. I/O should not block
+ *	when this is set. Use tty_io_nonblock() to check.
+ *
+ * TTY_LDISC_HALTED
+ *	Line discipline for this TTY was stopped. No work should be queued to
+ *	this ldisc.
  */
-#define TTY_THROTTLED 		0	/* Call unthrottle() at threshold min */
-#define TTY_IO_ERROR 		1	/* Cause an I/O error (may be no ldisc too) */
-#define TTY_OTHER_CLOSED 	2	/* Other side (if any) has closed */
-#define TTY_EXCLUSIVE 		3	/* Exclusive open mode */
-#define TTY_DO_WRITE_WAKEUP 	5	/* Call write_wakeup after queuing new */
-#define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
-#define TTY_PTY_LOCK 		16	/* pty private */
-#define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
-#define TTY_HUPPED 		18	/* Post driver->hangup() */
-#define TTY_HUPPING		19	/* Hangup in progress */
-#define TTY_LDISC_CHANGING	20	/* Change pending - non-block IO */
-#define TTY_LDISC_HALTED	22	/* Line discipline is halted */
+#define TTY_THROTTLED		0
+#define TTY_IO_ERROR		1
+#define TTY_OTHER_CLOSED	2
+#define TTY_EXCLUSIVE		3
+#define TTY_DO_WRITE_WAKEUP	5
+#define TTY_LDISC_OPEN		11
+#define TTY_PTY_LOCK		16
+#define TTY_NO_WRITE_SPLIT	17
+#define TTY_HUPPED		18
+#define TTY_HUPPING		19
+#define TTY_LDISC_CHANGING	20
+#define TTY_LDISC_HALTED	22
 
 static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file)
 {
-- 
cgit v1.2.3


From 34d809f8b4ff68f63e8d7f71d93d150382c6bb8b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 26 Nov 2021 09:15:58 +0100
Subject: tty: reformat TTY_DRIVER_ flags into kernel-doc

We want to reference TTY_DRIVER_* flags in Documentation/ later in this
series. But the current documentation in the TTY_DRIVER_*'s header does
not allow that. Reformat it to kernel-doc using "DOC" directive and
line-feeds, so that we can include it as it is.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20211126081611.11001-11-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 90 ++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 41274d551e28..4841d8069c07 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -492,49 +492,53 @@ static inline void tty_set_operations(struct tty_driver *driver,
 /* tty driver magic number */
 #define TTY_DRIVER_MAGIC		0x5402
 
-/*
- * tty driver flags
- * 
- * TTY_DRIVER_RESET_TERMIOS --- requests the tty layer to reset the
- * 	termios setting when the last process has closed the device.
- * 	Used for PTY's, in particular.
- * 
- * TTY_DRIVER_REAL_RAW --- if set, indicates that the driver will
- * 	guarantee never to set any special character handling
- * 	flags if ((IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR ||
- * 	!INPCK)).  That is, if there is no reason for the driver to
- * 	send notifications of parity and break characters up to the
- * 	line driver, it won't do so.  This allows the line driver to
- *	optimize for this case if this flag is set.  (Note that there
- * 	is also a promise, if the above case is true, not to signal
- * 	overruns, either.)
- *
- * TTY_DRIVER_DYNAMIC_DEV --- if set, the individual tty devices need
- *	to be registered with a call to tty_register_device() when the
- *	device is found in the system and unregistered with a call to
- *	tty_unregister_device() so the devices will be show up
- *	properly in sysfs.  If not set, driver->num entries will be
- *	created by the tty core in sysfs when tty_register_driver() is
- *	called.  This is to be used by drivers that have tty devices
- *	that can appear and disappear while the main tty driver is
- *	registered with the tty core.
- *
- * TTY_DRIVER_DEVPTS_MEM -- don't use the standard arrays, instead
- *	use dynamic memory keyed through the devpts filesystem.  This
- *	is only applicable to the pty driver.
- *
- * TTY_DRIVER_HARDWARE_BREAK -- hardware handles break signals. Pass
- *	the requested timeout to the caller instead of using a simple
- *	on/off interface.
- *
- * TTY_DRIVER_DYNAMIC_ALLOC -- do not allocate structures which are
- *	needed per line for this driver as it would waste memory.
- *	The driver will take care.
- *
- * TTY_DRIVER_UNNUMBERED_NODE -- do not create numbered /dev nodes. In
- *	other words create /dev/ttyprintk and not /dev/ttyprintk0.
- *	Applicable only when a driver for a single tty device is
- *	being allocated.
+/**
+ * DOC: TTY Driver Flags
+ *
+ * TTY_DRIVER_RESET_TERMIOS
+ *	Requests the tty layer to reset the termios setting when the last
+ *	process has closed the device. Used for PTYs, in particular.
+ *
+ * TTY_DRIVER_REAL_RAW
+ *	Indicates that the driver will guarantee not to set any special
+ *	character handling flags if this is set for the tty:
+ *
+ *	``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)``
+ *
+ *	That is, if there is no reason for the driver to
+ *	send notifications of parity and break characters up to the line
+ *	driver, it won't do so.  This allows the line driver to optimize for
+ *	this case if this flag is set.  (Note that there is also a promise, if
+ *	the above case is true, not to signal overruns, either.)
+ *
+ * TTY_DRIVER_DYNAMIC_DEV
+ *	The individual tty devices need to be registered with a call to
+ *	tty_register_device() when the device is found in the system and
+ *	unregistered with a call to tty_unregister_device() so the devices will
+ *	be show up properly in sysfs.  If not set, all &tty_driver.num entries
+ *	will be created by the tty core in sysfs when tty_register_driver() is
+ *	called.  This is to be used by drivers that have tty devices that can
+ *	appear and disappear while the main tty driver is registered with the
+ *	tty core.
+ *
+ * TTY_DRIVER_DEVPTS_MEM
+ *	Don't use the standard arrays (&tty_driver.ttys and
+ *	&tty_driver.termios), instead use dynamic memory keyed through the
+ *	devpts filesystem. This is only applicable to the PTY driver.
+ *
+ * TTY_DRIVER_HARDWARE_BREAK
+ *	Hardware handles break signals. Pass the requested timeout to the
+ *	&tty_operations.break_ctl instead of using a simple on/off interface.
+ *
+ * TTY_DRIVER_DYNAMIC_ALLOC
+ *	Do not allocate structures which are needed per line for this driver
+ *	(&tty_driver.ports) as it would waste memory. The driver will take
+ *	care. This is only applicable to the PTY driver.
+ *
+ * TTY_DRIVER_UNNUMBERED_NODE
+ *	Do not create numbered ``/dev`` nodes. For example, create
+ *	``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a
+ *	driver for a single tty device is being allocated.
  */
 #define TTY_DRIVER_INSTALLED		0x0001
 #define TTY_DRIVER_RESET_TERMIOS	0x0002
-- 
cgit v1.2.3


From df0e68c1e9945e2ee86d266ce45597bbd8299b06 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Wed, 17 Nov 2021 12:05:59 +0000
Subject: comedi: Move the main COMEDI headers

Move the main COMEDI driver headers out of "drivers/comedi/" into new
directory "include/linux/comedi/".  These are "comedidev.h",
"comedilib.h", "comedi_pci.h", "comedi_pcmcia.h", and "comedi_usb.h".
Additionally, move the user-space API header "comedi.h" into
"include/uapi/linux/" and add "WITH Linux-syscall-note" to its
SPDX-License-Identifier.

Update the "COMEDI DRIVERS" section of the MAINTAINERS file to account
for these changes.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20211117120604.117740-2-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/comedi/comedi_pci.h    |   56 ++
 include/linux/comedi/comedi_pcmcia.h |   48 ++
 include/linux/comedi/comedi_usb.h    |   41 +
 include/linux/comedi/comedidev.h     | 1053 +++++++++++++++++++++++
 include/linux/comedi/comedilib.h     |   26 +
 include/uapi/linux/comedi.h          | 1528 ++++++++++++++++++++++++++++++++++
 6 files changed, 2752 insertions(+)
 create mode 100644 include/linux/comedi/comedi_pci.h
 create mode 100644 include/linux/comedi/comedi_pcmcia.h
 create mode 100644 include/linux/comedi/comedi_usb.h
 create mode 100644 include/linux/comedi/comedidev.h
 create mode 100644 include/linux/comedi/comedilib.h
 create mode 100644 include/uapi/linux/comedi.h

(limited to 'include')

diff --git a/include/linux/comedi/comedi_pci.h b/include/linux/comedi/comedi_pci.h
new file mode 100644
index 000000000000..2fb50663e3ed
--- /dev/null
+++ b/include/linux/comedi/comedi_pci.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * comedi_pci.h
+ * header file for Comedi PCI drivers
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDI_PCI_H
+#define _COMEDI_PCI_H
+
+#include <linux/pci.h>
+#include <linux/comedi/comedidev.h>
+
+/*
+ * PCI Vendor IDs not in <linux/pci_ids.h>
+ */
+#define PCI_VENDOR_ID_KOLTER		0x1001
+#define PCI_VENDOR_ID_ICP		0x104c
+#define PCI_VENDOR_ID_DT		0x1116
+#define PCI_VENDOR_ID_IOTECH		0x1616
+#define PCI_VENDOR_ID_CONTEC		0x1221
+#define PCI_VENDOR_ID_RTD		0x1435
+#define PCI_VENDOR_ID_HUMUSOFT		0x186c
+
+struct pci_dev *comedi_to_pci_dev(struct comedi_device *dev);
+
+int comedi_pci_enable(struct comedi_device *dev);
+void comedi_pci_disable(struct comedi_device *dev);
+void comedi_pci_detach(struct comedi_device *dev);
+
+int comedi_pci_auto_config(struct pci_dev *pcidev, struct comedi_driver *driver,
+			   unsigned long context);
+void comedi_pci_auto_unconfig(struct pci_dev *pcidev);
+
+int comedi_pci_driver_register(struct comedi_driver *comedi_driver,
+			       struct pci_driver *pci_driver);
+void comedi_pci_driver_unregister(struct comedi_driver *comedi_driver,
+				  struct pci_driver *pci_driver);
+
+/**
+ * module_comedi_pci_driver() - Helper macro for registering a comedi PCI driver
+ * @__comedi_driver: comedi_driver struct
+ * @__pci_driver: pci_driver struct
+ *
+ * Helper macro for comedi PCI drivers which do not do anything special
+ * in module init/exit. This eliminates a lot of boilerplate. Each
+ * module may only use this macro once, and calling it replaces
+ * module_init() and module_exit()
+ */
+#define module_comedi_pci_driver(__comedi_driver, __pci_driver) \
+	module_driver(__comedi_driver, comedi_pci_driver_register, \
+			comedi_pci_driver_unregister, &(__pci_driver))
+
+#endif /* _COMEDI_PCI_H */
diff --git a/include/linux/comedi/comedi_pcmcia.h b/include/linux/comedi/comedi_pcmcia.h
new file mode 100644
index 000000000000..a33dfb65b869
--- /dev/null
+++ b/include/linux/comedi/comedi_pcmcia.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * comedi_pcmcia.h
+ * header file for Comedi PCMCIA drivers
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDI_PCMCIA_H
+#define _COMEDI_PCMCIA_H
+
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+#include <linux/comedi/comedidev.h>
+
+struct pcmcia_device *comedi_to_pcmcia_dev(struct comedi_device *dev);
+
+int comedi_pcmcia_enable(struct comedi_device *dev,
+			 int (*conf_check)(struct pcmcia_device *p_dev,
+					   void *priv_data));
+void comedi_pcmcia_disable(struct comedi_device *dev);
+
+int comedi_pcmcia_auto_config(struct pcmcia_device *link,
+			      struct comedi_driver *driver);
+void comedi_pcmcia_auto_unconfig(struct pcmcia_device *link);
+
+int comedi_pcmcia_driver_register(struct comedi_driver *comedi_driver,
+				  struct pcmcia_driver *pcmcia_driver);
+void comedi_pcmcia_driver_unregister(struct comedi_driver *comedi_driver,
+				     struct pcmcia_driver *pcmcia_driver);
+
+/**
+ * module_comedi_pcmcia_driver() - Helper macro for registering a comedi
+ * PCMCIA driver
+ * @__comedi_driver: comedi_driver struct
+ * @__pcmcia_driver: pcmcia_driver struct
+ *
+ * Helper macro for comedi PCMCIA drivers which do not do anything special
+ * in module init/exit. This eliminates a lot of boilerplate. Each
+ * module may only use this macro once, and calling it replaces
+ * module_init() and module_exit()
+ */
+#define module_comedi_pcmcia_driver(__comedi_driver, __pcmcia_driver) \
+	module_driver(__comedi_driver, comedi_pcmcia_driver_register, \
+			comedi_pcmcia_driver_unregister, &(__pcmcia_driver))
+
+#endif /* _COMEDI_PCMCIA_H */
diff --git a/include/linux/comedi/comedi_usb.h b/include/linux/comedi/comedi_usb.h
new file mode 100644
index 000000000000..5d17dd425bd2
--- /dev/null
+++ b/include/linux/comedi/comedi_usb.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* comedi_usb.h
+ * header file for USB Comedi drivers
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDI_USB_H
+#define _COMEDI_USB_H
+
+#include <linux/usb.h>
+#include <linux/comedi/comedidev.h>
+
+struct usb_interface *comedi_to_usb_interface(struct comedi_device *dev);
+struct usb_device *comedi_to_usb_dev(struct comedi_device *dev);
+
+int comedi_usb_auto_config(struct usb_interface *intf,
+			   struct comedi_driver *driver, unsigned long context);
+void comedi_usb_auto_unconfig(struct usb_interface *intf);
+
+int comedi_usb_driver_register(struct comedi_driver *comedi_driver,
+			       struct usb_driver *usb_driver);
+void comedi_usb_driver_unregister(struct comedi_driver *comedi_driver,
+				  struct usb_driver *usb_driver);
+
+/**
+ * module_comedi_usb_driver() - Helper macro for registering a comedi USB driver
+ * @__comedi_driver: comedi_driver struct
+ * @__usb_driver: usb_driver struct
+ *
+ * Helper macro for comedi USB drivers which do not do anything special
+ * in module init/exit. This eliminates a lot of boilerplate. Each
+ * module may only use this macro once, and calling it replaces
+ * module_init() and module_exit()
+ */
+#define module_comedi_usb_driver(__comedi_driver, __usb_driver) \
+	module_driver(__comedi_driver, comedi_usb_driver_register, \
+			comedi_usb_driver_unregister, &(__usb_driver))
+
+#endif /* _COMEDI_USB_H */
diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h
new file mode 100644
index 000000000000..0a1150900ef3
--- /dev/null
+++ b/include/linux/comedi/comedidev.h
@@ -0,0 +1,1053 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * comedidev.h
+ * header file for kernel-only structures, variables, and constants
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDIDEV_H
+#define _COMEDIDEV_H
+
+#include <linux/dma-mapping.h>
+#include <linux/mutex.h>
+#include <linux/spinlock_types.h>
+#include <linux/rwsem.h>
+#include <linux/kref.h>
+#include <linux/comedi.h>
+
+#define COMEDI_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c))
+#define COMEDI_VERSION_CODE COMEDI_VERSION(COMEDI_MAJORVERSION, \
+	COMEDI_MINORVERSION, COMEDI_MICROVERSION)
+#define COMEDI_RELEASE VERSION
+
+#define COMEDI_NUM_BOARD_MINORS 0x30
+
+/**
+ * struct comedi_subdevice - Working data for a COMEDI subdevice
+ * @device: COMEDI device to which this subdevice belongs.  (Initialized by
+ *	comedi_alloc_subdevices().)
+ * @index: Index of this subdevice within device's array of subdevices.
+ *	(Initialized by comedi_alloc_subdevices().)
+ * @type: Type of subdevice from &enum comedi_subdevice_type.  (Initialized by
+ *	the low-level driver.)
+ * @n_chan: Number of channels the subdevice supports.  (Initialized by the
+ *	low-level driver.)
+ * @subdev_flags: Various "SDF" flags indicating aspects of the subdevice to
+ *	the COMEDI core and user application.  (Initialized by the low-level
+ *	driver.)
+ * @len_chanlist: Maximum length of a channel list if the subdevice supports
+ *	asynchronous acquisition commands.  (Optionally initialized by the
+ *	low-level driver, or changed from 0 to 1 during post-configuration.)
+ * @private: Private data pointer which is either set by the low-level driver
+ *	itself, or by a call to comedi_alloc_spriv() which allocates storage.
+ *	In the latter case, the storage is automatically freed after the
+ *	low-level driver's "detach" handler is called for the device.
+ *	(Initialized by the low-level driver.)
+ * @async: Pointer to &struct comedi_async id the subdevice supports
+ *	asynchronous acquisition commands.  (Allocated and initialized during
+ *	post-configuration if needed.)
+ * @lock: Pointer to a file object that performed a %COMEDI_LOCK ioctl on the
+ *	subdevice.  (Initially NULL.)
+ * @busy: Pointer to a file object that is performing an asynchronous
+ *	acquisition command on the subdevice.  (Initially NULL.)
+ * @runflags: Internal flags for use by COMEDI core, mostly indicating whether
+ *	an asynchronous acquisition command is running.
+ * @spin_lock: Generic spin-lock for use by the COMEDI core and the low-level
+ *	driver.  (Initialized by comedi_alloc_subdevices().)
+ * @io_bits: Bit-mask indicating the channel directions for a DIO subdevice
+ *	with no more than 32 channels.  A '1' at a bit position indicates the
+ *	corresponding channel is configured as an output.  (Initialized by the
+ *	low-level driver for a DIO subdevice.  Forced to all-outputs during
+ *	post-configuration for a digital output subdevice.)
+ * @maxdata: If non-zero, this is the maximum raw data value of each channel.
+ *	If zero, the maximum data value is channel-specific.  (Initialized by
+ *	the low-level driver.)
+ * @maxdata_list: If the maximum data value is channel-specific, this points
+ *	to an array of maximum data values indexed by channel index.
+ *	(Initialized by the low-level driver.)
+ * @range_table: If non-NULL, this points to a COMEDI range table for the
+ *	subdevice.  If NULL, the range table is channel-specific.  (Initialized
+ *	by the low-level driver, will be set to an "invalid" range table during
+ *	post-configuration if @range_table and @range_table_list are both
+ *	NULL.)
+ * @range_table_list: If the COMEDI range table is channel-specific, this
+ *	points to an array of pointers to COMEDI range tables indexed by
+ *	channel number.  (Initialized by the low-level driver.)
+ * @chanlist: Not used.
+ * @insn_read: Optional pointer to a handler for the %INSN_READ instruction.
+ *	(Initialized by the low-level driver, or set to a default handler
+ *	during post-configuration.)
+ * @insn_write: Optional pointer to a handler for the %INSN_WRITE instruction.
+ *	(Initialized by the low-level driver, or set to a default handler
+ *	during post-configuration.)
+ * @insn_bits: Optional pointer to a handler for the %INSN_BITS instruction
+ *	for a digital input, digital output or digital input/output subdevice.
+ *	(Initialized by the low-level driver, or set to a default handler
+ *	during post-configuration.)
+ * @insn_config: Optional pointer to a handler for the %INSN_CONFIG
+ *	instruction.  (Initialized by the low-level driver, or set to a default
+ *	handler during post-configuration.)
+ * @do_cmd: If the subdevice supports asynchronous acquisition commands, this
+ *	points to a handler to set it up in hardware.  (Initialized by the
+ *	low-level driver.)
+ * @do_cmdtest: If the subdevice supports asynchronous acquisition commands,
+ *	this points to a handler used to check and possibly tweak a prospective
+ *	acquisition command without setting it up in hardware.  (Initialized by
+ *	the low-level driver.)
+ * @poll: If the subdevice supports asynchronous acquisition commands, this
+ *	is an optional pointer to a handler for the %COMEDI_POLL ioctl which
+ *	instructs the low-level driver to synchronize buffers.  (Initialized by
+ *	the low-level driver if needed.)
+ * @cancel: If the subdevice supports asynchronous acquisition commands, this
+ *	points to a handler used to terminate a running command.  (Initialized
+ *	by the low-level driver.)
+ * @buf_change: If the subdevice supports asynchronous acquisition commands,
+ *	this is an optional pointer to a handler that is called when the data
+ *	buffer for handling asynchronous commands is allocated or reallocated.
+ *	(Initialized by the low-level driver if needed.)
+ * @munge: If the subdevice supports asynchronous acquisition commands and
+ *	uses DMA to transfer data from the hardware to the acquisition buffer,
+ *	this points to a function used to "munge" the data values from the
+ *	hardware into the format expected by COMEDI.  (Initialized by the
+ *	low-level driver if needed.)
+ * @async_dma_dir: If the subdevice supports asynchronous acquisition commands
+ *	and uses DMA to transfer data from the hardware to the acquisition
+ *	buffer, this sets the DMA direction for the buffer. (initialized to
+ *	%DMA_NONE by comedi_alloc_subdevices() and changed by the low-level
+ *	driver if necessary.)
+ * @state: Handy bit-mask indicating the output states for a DIO or digital
+ *	output subdevice with no more than 32 channels. (Initialized by the
+ *	low-level driver.)
+ * @class_dev: If the subdevice supports asynchronous acquisition commands,
+ *	this points to a sysfs comediX_subdY device where X is the minor device
+ *	number of the COMEDI device and Y is the subdevice number.  The minor
+ *	device number for the sysfs device is allocated dynamically in the
+ *	range 48 to 255.  This is used to allow the COMEDI device to be opened
+ *	with a different default read or write subdevice.  (Allocated during
+ *	post-configuration if needed.)
+ * @minor: If @class_dev is set, this is its dynamically allocated minor
+ *	device number.  (Set during post-configuration if necessary.)
+ * @readback: Optional pointer to memory allocated by
+ *	comedi_alloc_subdev_readback() used to hold the values written to
+ *	analog output channels so they can be read back.  The storage is
+ *	automatically freed after the low-level driver's "detach" handler is
+ *	called for the device.  (Initialized by the low-level driver.)
+ *
+ * This is the main control structure for a COMEDI subdevice.  If the subdevice
+ * supports asynchronous acquisition commands, additional information is stored
+ * in the &struct comedi_async pointed to by @async.
+ *
+ * Most of the subdevice is initialized by the low-level driver's "attach" or
+ * "auto_attach" handlers but parts of it are initialized by
+ * comedi_alloc_subdevices(), and other parts are initialized during
+ * post-configuration on return from that handler.
+ *
+ * A low-level driver that sets @insn_bits for a digital input, digital output,
+ * or DIO subdevice may leave @insn_read and @insn_write uninitialized, in
+ * which case they will be set to a default handler during post-configuration
+ * that uses @insn_bits to emulate the %INSN_READ and %INSN_WRITE instructions.
+ */
+struct comedi_subdevice {
+	struct comedi_device *device;
+	int index;
+	int type;
+	int n_chan;
+	int subdev_flags;
+	int len_chanlist;	/* maximum length of channel/gain list */
+
+	void *private;
+
+	struct comedi_async *async;
+
+	void *lock;
+	void *busy;
+	unsigned int runflags;
+	spinlock_t spin_lock;	/* generic spin-lock for COMEDI and drivers */
+
+	unsigned int io_bits;
+
+	unsigned int maxdata;	/* if maxdata==0, use list */
+	const unsigned int *maxdata_list;	/* list is channel specific */
+
+	const struct comedi_lrange *range_table;
+	const struct comedi_lrange *const *range_table_list;
+
+	unsigned int *chanlist;	/* driver-owned chanlist (not used) */
+
+	int (*insn_read)(struct comedi_device *dev, struct comedi_subdevice *s,
+			 struct comedi_insn *insn, unsigned int *data);
+	int (*insn_write)(struct comedi_device *dev, struct comedi_subdevice *s,
+			  struct comedi_insn *insn, unsigned int *data);
+	int (*insn_bits)(struct comedi_device *dev, struct comedi_subdevice *s,
+			 struct comedi_insn *insn, unsigned int *data);
+	int (*insn_config)(struct comedi_device *dev,
+			   struct comedi_subdevice *s,
+			   struct comedi_insn *insn,
+			   unsigned int *data);
+
+	int (*do_cmd)(struct comedi_device *dev, struct comedi_subdevice *s);
+	int (*do_cmdtest)(struct comedi_device *dev,
+			  struct comedi_subdevice *s,
+			  struct comedi_cmd *cmd);
+	int (*poll)(struct comedi_device *dev, struct comedi_subdevice *s);
+	int (*cancel)(struct comedi_device *dev, struct comedi_subdevice *s);
+
+	/* called when the buffer changes */
+	int (*buf_change)(struct comedi_device *dev,
+			  struct comedi_subdevice *s);
+
+	void (*munge)(struct comedi_device *dev, struct comedi_subdevice *s,
+		      void *data, unsigned int num_bytes,
+		      unsigned int start_chan_index);
+	enum dma_data_direction async_dma_dir;
+
+	unsigned int state;
+
+	struct device *class_dev;
+	int minor;
+
+	unsigned int *readback;
+};
+
+/**
+ * struct comedi_buf_page - Describe a page of a COMEDI buffer
+ * @virt_addr: Kernel address of page.
+ * @dma_addr: DMA address of page if in DMA coherent memory.
+ */
+struct comedi_buf_page {
+	void *virt_addr;
+	dma_addr_t dma_addr;
+};
+
+/**
+ * struct comedi_buf_map - Describe pages in a COMEDI buffer
+ * @dma_hw_dev: Low-level hardware &struct device pointer copied from the
+ *	COMEDI device's hw_dev member.
+ * @page_list: Pointer to array of &struct comedi_buf_page, one for each
+ *	page in the buffer.
+ * @n_pages: Number of pages in the buffer.
+ * @dma_dir: DMA direction used to allocate pages of DMA coherent memory,
+ *	or %DMA_NONE if pages allocated from regular memory.
+ * @refcount: &struct kref reference counter used to free the buffer.
+ *
+ * A COMEDI data buffer is allocated as individual pages, either in
+ * conventional memory or DMA coherent memory, depending on the attached,
+ * low-level hardware device.  (The buffer pages also get mapped into the
+ * kernel's contiguous virtual address space pointed to by the 'prealloc_buf'
+ * member of &struct comedi_async.)
+ *
+ * The buffer is normally freed when the COMEDI device is detached from the
+ * low-level driver (which may happen due to device removal), but if it happens
+ * to be mmapped at the time, the pages cannot be freed until the buffer has
+ * been munmapped.  That is what the reference counter is for.  (The virtual
+ * address space pointed by 'prealloc_buf' is freed when the COMEDI device is
+ * detached.)
+ */
+struct comedi_buf_map {
+	struct device *dma_hw_dev;
+	struct comedi_buf_page *page_list;
+	unsigned int n_pages;
+	enum dma_data_direction dma_dir;
+	struct kref refcount;
+};
+
+/**
+ * struct comedi_async - Control data for asynchronous COMEDI commands
+ * @prealloc_buf: Kernel virtual address of allocated acquisition buffer.
+ * @prealloc_bufsz: Buffer size (in bytes).
+ * @buf_map: Map of buffer pages.
+ * @max_bufsize: Maximum allowed buffer size (in bytes).
+ * @buf_write_count: "Write completed" count (in bytes, modulo 2**32).
+ * @buf_write_alloc_count: "Allocated for writing" count (in bytes,
+ *	modulo 2**32).
+ * @buf_read_count: "Read completed" count (in bytes, modulo 2**32).
+ * @buf_read_alloc_count: "Allocated for reading" count (in bytes,
+ *	modulo 2**32).
+ * @buf_write_ptr: Buffer position for writer.
+ * @buf_read_ptr: Buffer position for reader.
+ * @cur_chan: Current position in chanlist for scan (for those drivers that
+ *	use it).
+ * @scans_done: The number of scans completed.
+ * @scan_progress: Amount received or sent for current scan (in bytes).
+ * @munge_chan: Current position in chanlist for "munging".
+ * @munge_count: "Munge" count (in bytes, modulo 2**32).
+ * @munge_ptr: Buffer position for "munging".
+ * @events: Bit-vector of events that have occurred.
+ * @cmd: Details of comedi command in progress.
+ * @wait_head: Task wait queue for file reader or writer.
+ * @cb_mask: Bit-vector of events that should wake waiting tasks.
+ * @inttrig: Software trigger function for command, or NULL.
+ *
+ * Note about the ..._count and ..._ptr members:
+ *
+ * Think of the _Count values being integers of unlimited size, indexing
+ * into a buffer of infinite length (though only an advancing portion
+ * of the buffer of fixed length prealloc_bufsz is accessible at any
+ * time).  Then:
+ *
+ *   Buf_Read_Count <= Buf_Read_Alloc_Count <= Munge_Count <=
+ *   Buf_Write_Count <= Buf_Write_Alloc_Count <=
+ *   (Buf_Read_Count + prealloc_bufsz)
+ *
+ * (Those aren't the actual members, apart from prealloc_bufsz.) When the
+ * buffer is reset, those _Count values start at 0 and only increase in value,
+ * maintaining the above inequalities until the next time the buffer is
+ * reset.  The buffer is divided into the following regions by the inequalities:
+ *
+ *   [0, Buf_Read_Count):
+ *     old region no longer accessible
+ *
+ *   [Buf_Read_Count, Buf_Read_Alloc_Count):
+ *     filled and munged region allocated for reading but not yet read
+ *
+ *   [Buf_Read_Alloc_Count, Munge_Count):
+ *     filled and munged region not yet allocated for reading
+ *
+ *   [Munge_Count, Buf_Write_Count):
+ *     filled region not yet munged
+ *
+ *   [Buf_Write_Count, Buf_Write_Alloc_Count):
+ *     unfilled region allocated for writing but not yet written
+ *
+ *   [Buf_Write_Alloc_Count, Buf_Read_Count + prealloc_bufsz):
+ *     unfilled region not yet allocated for writing
+ *
+ *   [Buf_Read_Count + prealloc_bufsz, infinity):
+ *     unfilled region not yet accessible
+ *
+ * Data needs to be written into the buffer before it can be read out,
+ * and may need to be converted (or "munged") between the two
+ * operations.  Extra unfilled buffer space may need to allocated for
+ * writing (advancing Buf_Write_Alloc_Count) before new data is written.
+ * After writing new data, the newly filled space needs to be released
+ * (advancing Buf_Write_Count).  This also results in the new data being
+ * "munged" (advancing Munge_Count).  Before data is read out of the
+ * buffer, extra space may need to be allocated for reading (advancing
+ * Buf_Read_Alloc_Count).  After the data has been read out, the space
+ * needs to be released (advancing Buf_Read_Count).
+ *
+ * The actual members, buf_read_count, buf_read_alloc_count,
+ * munge_count, buf_write_count, and buf_write_alloc_count take the
+ * value of the corresponding capitalized _Count values modulo 2^32
+ * (UINT_MAX+1).  Subtracting a "higher" _count value from a "lower"
+ * _count value gives the same answer as subtracting a "higher" _Count
+ * value from a lower _Count value because prealloc_bufsz < UINT_MAX+1.
+ * The modulo operation is done implicitly.
+ *
+ * The buf_read_ptr, munge_ptr, and buf_write_ptr members take the value
+ * of the corresponding capitalized _Count values modulo prealloc_bufsz.
+ * These correspond to byte indices in the physical buffer.  The modulo
+ * operation is done by subtracting prealloc_bufsz when the value
+ * exceeds prealloc_bufsz (assuming prealloc_bufsz plus the increment is
+ * less than or equal to UINT_MAX).
+ */
+struct comedi_async {
+	void *prealloc_buf;
+	unsigned int prealloc_bufsz;
+	struct comedi_buf_map *buf_map;
+	unsigned int max_bufsize;
+	unsigned int buf_write_count;
+	unsigned int buf_write_alloc_count;
+	unsigned int buf_read_count;
+	unsigned int buf_read_alloc_count;
+	unsigned int buf_write_ptr;
+	unsigned int buf_read_ptr;
+	unsigned int cur_chan;
+	unsigned int scans_done;
+	unsigned int scan_progress;
+	unsigned int munge_chan;
+	unsigned int munge_count;
+	unsigned int munge_ptr;
+	unsigned int events;
+	struct comedi_cmd cmd;
+	wait_queue_head_t wait_head;
+	unsigned int cb_mask;
+	int (*inttrig)(struct comedi_device *dev, struct comedi_subdevice *s,
+		       unsigned int x);
+};
+
+/**
+ * enum comedi_cb - &struct comedi_async callback "events"
+ * @COMEDI_CB_EOS:		end-of-scan
+ * @COMEDI_CB_EOA:		end-of-acquisition/output
+ * @COMEDI_CB_BLOCK:		data has arrived, wakes up read() / write()
+ * @COMEDI_CB_EOBUF:		DEPRECATED: end of buffer
+ * @COMEDI_CB_ERROR:		card error during acquisition
+ * @COMEDI_CB_OVERFLOW:		buffer overflow/underflow
+ * @COMEDI_CB_ERROR_MASK:	events that indicate an error has occurred
+ * @COMEDI_CB_CANCEL_MASK:	events that will cancel an async command
+ */
+enum comedi_cb {
+	COMEDI_CB_EOS		= BIT(0),
+	COMEDI_CB_EOA		= BIT(1),
+	COMEDI_CB_BLOCK		= BIT(2),
+	COMEDI_CB_EOBUF		= BIT(3),
+	COMEDI_CB_ERROR		= BIT(4),
+	COMEDI_CB_OVERFLOW	= BIT(5),
+	/* masks */
+	COMEDI_CB_ERROR_MASK	= (COMEDI_CB_ERROR | COMEDI_CB_OVERFLOW),
+	COMEDI_CB_CANCEL_MASK	= (COMEDI_CB_EOA | COMEDI_CB_ERROR_MASK)
+};
+
+/**
+ * struct comedi_driver - COMEDI driver registration
+ * @driver_name: Name of driver.
+ * @module: Owning module.
+ * @attach: The optional "attach" handler for manually configured COMEDI
+ *	devices.
+ * @detach: The "detach" handler for deconfiguring COMEDI devices.
+ * @auto_attach: The optional "auto_attach" handler for automatically
+ *	configured COMEDI devices.
+ * @num_names: Optional number of "board names" supported.
+ * @board_name: Optional pointer to a pointer to a board name.  The pointer
+ *	to a board name is embedded in an element of a driver-defined array
+ *	of static, read-only board type information.
+ * @offset: Optional size of each element of the driver-defined array of
+ *	static, read-only board type information, i.e. the offset between each
+ *	pointer to a board name.
+ *
+ * This is used with comedi_driver_register() and comedi_driver_unregister() to
+ * register and unregister a low-level COMEDI driver with the COMEDI core.
+ *
+ * If @num_names is non-zero, @board_name should be non-NULL, and @offset
+ * should be at least sizeof(*board_name).  These are used by the handler for
+ * the %COMEDI_DEVCONFIG ioctl to match a hardware device and its driver by
+ * board name.  If @num_names is zero, the %COMEDI_DEVCONFIG ioctl matches a
+ * hardware device and its driver by driver name.  This is only useful if the
+ * @attach handler is set.  If @num_names is non-zero, the driver's @attach
+ * handler will be called with the COMEDI device structure's board_ptr member
+ * pointing to the matched pointer to a board name within the driver's private
+ * array of static, read-only board type information.
+ *
+ * The @detach handler has two roles.  If a COMEDI device was successfully
+ * configured by the @attach or @auto_attach handler, it is called when the
+ * device is being deconfigured (by the %COMEDI_DEVCONFIG ioctl, or due to
+ * unloading of the driver, or due to device removal).  It is also called when
+ * the @attach or @auto_attach handler returns an error.  Therefore, the
+ * @attach or @auto_attach handlers can defer clean-up on error until the
+ * @detach handler is called.  If the @attach or @auto_attach handlers free
+ * any resources themselves, they must prevent the @detach handler from
+ * freeing the same resources.  The @detach handler must not assume that all
+ * resources requested by the @attach or @auto_attach handler were
+ * successfully allocated.
+ */
+struct comedi_driver {
+	/* private: */
+	struct comedi_driver *next;	/* Next in list of COMEDI drivers. */
+	/* public: */
+	const char *driver_name;
+	struct module *module;
+	int (*attach)(struct comedi_device *dev, struct comedi_devconfig *it);
+	void (*detach)(struct comedi_device *dev);
+	int (*auto_attach)(struct comedi_device *dev, unsigned long context);
+	unsigned int num_names;
+	const char *const *board_name;
+	int offset;
+};
+
+/**
+ * struct comedi_device - Working data for a COMEDI device
+ * @use_count: Number of open file objects.
+ * @driver: Low-level COMEDI driver attached to this COMEDI device.
+ * @pacer: Optional pointer to a dynamically allocated acquisition pacer
+ *	control.  It is freed automatically after the COMEDI device is
+ *	detached from the low-level driver.
+ * @private: Optional pointer to private data allocated by the low-level
+ *	driver.  It is freed automatically after the COMEDI device is
+ *	detached from the low-level driver.
+ * @class_dev: Sysfs comediX device.
+ * @minor: Minor device number of COMEDI char device (0-47).
+ * @detach_count: Counter incremented every time the COMEDI device is detached.
+ *	Used for checking a previous attachment is still valid.
+ * @hw_dev: Optional pointer to the low-level hardware &struct device.  It is
+ *	required for automatically configured COMEDI devices and optional for
+ *	COMEDI devices configured by the %COMEDI_DEVCONFIG ioctl, although
+ *	the bus-specific COMEDI functions only work if it is set correctly.
+ *	It is also passed to dma_alloc_coherent() for COMEDI subdevices that
+ *	have their 'async_dma_dir' member set to something other than
+ *	%DMA_NONE.
+ * @board_name: Pointer to a COMEDI board name or a COMEDI driver name.  When
+ *	the low-level driver's "attach" handler is called by the handler for
+ *	the %COMEDI_DEVCONFIG ioctl, it either points to a matched board name
+ *	string if the 'num_names' member of the &struct comedi_driver is
+ *	non-zero, otherwise it points to the low-level driver name string.
+ *	When the low-lever driver's "auto_attach" handler is called for an
+ *	automatically configured COMEDI device, it points to the low-level
+ *	driver name string.  The low-level driver is free to change it in its
+ *	"attach" or "auto_attach" handler if it wishes.
+ * @board_ptr: Optional pointer to private, read-only board type information in
+ *	the low-level driver.  If the 'num_names' member of the &struct
+ *	comedi_driver is non-zero, the handler for the %COMEDI_DEVCONFIG ioctl
+ *	will point it to a pointer to a matched board name string within the
+ *	driver's private array of static, read-only board type information when
+ *	calling the driver's "attach" handler.  The low-level driver is free to
+ *	change it.
+ * @attached: Flag indicating that the COMEDI device is attached to a low-level
+ *	driver.
+ * @ioenabled: Flag used to indicate that a PCI device has been enabled and
+ *	its regions requested.
+ * @spinlock: Generic spin-lock for use by the low-level driver.
+ * @mutex: Generic mutex for use by the COMEDI core module.
+ * @attach_lock: &struct rw_semaphore used to guard against the COMEDI device
+ *	being detached while an operation is in progress.  The down_write()
+ *	operation is only allowed while @mutex is held and is used when
+ *	changing @attached and @detach_count and calling the low-level driver's
+ *	"detach" handler.  The down_read() operation is generally used without
+ *	holding @mutex.
+ * @refcount: &struct kref reference counter for freeing COMEDI device.
+ * @n_subdevices: Number of COMEDI subdevices allocated by the low-level
+ *	driver for this device.
+ * @subdevices: Dynamically allocated array of COMEDI subdevices.
+ * @mmio: Optional pointer to a remapped MMIO region set by the low-level
+ *	driver.
+ * @iobase: Optional base of an I/O port region requested by the low-level
+ *	driver.
+ * @iolen: Length of I/O port region requested at @iobase.
+ * @irq: Optional IRQ number requested by the low-level driver.
+ * @read_subdev: Optional pointer to a default COMEDI subdevice operated on by
+ *	the read() file operation.  Set by the low-level driver.
+ * @write_subdev: Optional pointer to a default COMEDI subdevice operated on by
+ *	the write() file operation.  Set by the low-level driver.
+ * @async_queue: Storage for fasync_helper().
+ * @open: Optional pointer to a function set by the low-level driver to be
+ *	called when @use_count changes from 0 to 1.
+ * @close: Optional pointer to a function set by the low-level driver to be
+ *	called when @use_count changed from 1 to 0.
+ * @insn_device_config: Optional pointer to a handler for all sub-instructions
+ *	except %INSN_DEVICE_CONFIG_GET_ROUTES of the %INSN_DEVICE_CONFIG
+ *	instruction.  If this is not initialized by the low-level driver, a
+ *	default handler will be set during post-configuration.
+ * @get_valid_routes: Optional pointer to a handler for the
+ *	%INSN_DEVICE_CONFIG_GET_ROUTES sub-instruction of the
+ *	%INSN_DEVICE_CONFIG instruction set.  If this is not initialized by the
+ *	low-level driver, a default handler that copies zero routes back to the
+ *	user will be used.
+ *
+ * This is the main control data structure for a COMEDI device (as far as the
+ * COMEDI core is concerned).  There are two groups of COMEDI devices -
+ * "legacy" devices that are configured by the handler for the
+ * %COMEDI_DEVCONFIG ioctl, and automatically configured devices resulting
+ * from a call to comedi_auto_config() as a result of a bus driver probe in
+ * a low-level COMEDI driver.  The "legacy" COMEDI devices are allocated
+ * during module initialization if the "comedi_num_legacy_minors" module
+ * parameter is non-zero and use minor device numbers from 0 to
+ * comedi_num_legacy_minors minus one.  The automatically configured COMEDI
+ * devices are allocated on demand and use minor device numbers from
+ * comedi_num_legacy_minors to 47.
+ */
+struct comedi_device {
+	int use_count;
+	struct comedi_driver *driver;
+	struct comedi_8254 *pacer;
+	void *private;
+
+	struct device *class_dev;
+	int minor;
+	unsigned int detach_count;
+	struct device *hw_dev;
+
+	const char *board_name;
+	const void *board_ptr;
+	unsigned int attached:1;
+	unsigned int ioenabled:1;
+	spinlock_t spinlock;	/* generic spin-lock for low-level driver */
+	struct mutex mutex;	/* generic mutex for COMEDI core */
+	struct rw_semaphore attach_lock;
+	struct kref refcount;
+
+	int n_subdevices;
+	struct comedi_subdevice *subdevices;
+
+	/* dumb */
+	void __iomem *mmio;
+	unsigned long iobase;
+	unsigned long iolen;
+	unsigned int irq;
+
+	struct comedi_subdevice *read_subdev;
+	struct comedi_subdevice *write_subdev;
+
+	struct fasync_struct *async_queue;
+
+	int (*open)(struct comedi_device *dev);
+	void (*close)(struct comedi_device *dev);
+	int (*insn_device_config)(struct comedi_device *dev,
+				  struct comedi_insn *insn, unsigned int *data);
+	unsigned int (*get_valid_routes)(struct comedi_device *dev,
+					 unsigned int n_pairs,
+					 unsigned int *pair_data);
+};
+
+/*
+ * function prototypes
+ */
+
+void comedi_event(struct comedi_device *dev, struct comedi_subdevice *s);
+
+struct comedi_device *comedi_dev_get_from_minor(unsigned int minor);
+int comedi_dev_put(struct comedi_device *dev);
+
+bool comedi_is_subdevice_running(struct comedi_subdevice *s);
+
+void *comedi_alloc_spriv(struct comedi_subdevice *s, size_t size);
+void comedi_set_spriv_auto_free(struct comedi_subdevice *s);
+
+int comedi_check_chanlist(struct comedi_subdevice *s,
+			  int n,
+			  unsigned int *chanlist);
+
+/* range stuff */
+
+#define RANGE(a, b)		{(a) * 1e6, (b) * 1e6, 0}
+#define RANGE_ext(a, b)		{(a) * 1e6, (b) * 1e6, RF_EXTERNAL}
+#define RANGE_mA(a, b)		{(a) * 1e6, (b) * 1e6, UNIT_mA}
+#define RANGE_unitless(a, b)	{(a) * 1e6, (b) * 1e6, 0}
+#define BIP_RANGE(a)		{-(a) * 1e6, (a) * 1e6, 0}
+#define UNI_RANGE(a)		{0, (a) * 1e6, 0}
+
+extern const struct comedi_lrange range_bipolar10;
+extern const struct comedi_lrange range_bipolar5;
+extern const struct comedi_lrange range_bipolar2_5;
+extern const struct comedi_lrange range_unipolar10;
+extern const struct comedi_lrange range_unipolar5;
+extern const struct comedi_lrange range_unipolar2_5;
+extern const struct comedi_lrange range_0_20mA;
+extern const struct comedi_lrange range_4_20mA;
+extern const struct comedi_lrange range_0_32mA;
+extern const struct comedi_lrange range_unknown;
+
+#define range_digital		range_unipolar5
+
+/**
+ * struct comedi_lrange - Describes a COMEDI range table
+ * @length: Number of entries in the range table.
+ * @range: Array of &struct comedi_krange, one for each range.
+ *
+ * Each element of @range[] describes the minimum and maximum physical range
+ * and the type of units.  Typically, the type of unit is %UNIT_volt
+ * (i.e. volts) and the minimum and maximum are in millionths of a volt.
+ * There may also be a flag that indicates the minimum and maximum are merely
+ * scale factors for an unknown, external reference.
+ */
+struct comedi_lrange {
+	int length;
+	struct comedi_krange range[];
+};
+
+/**
+ * comedi_range_is_bipolar() - Test if subdevice range is bipolar
+ * @s: COMEDI subdevice.
+ * @range: Index of range within a range table.
+ *
+ * Tests whether a range is bipolar by checking whether its minimum value
+ * is negative.
+ *
+ * Assumes @range is valid.  Does not work for subdevices using a
+ * channel-specific range table list.
+ *
+ * Return:
+ *	%true if the range is bipolar.
+ *	%false if the range is unipolar.
+ */
+static inline bool comedi_range_is_bipolar(struct comedi_subdevice *s,
+					   unsigned int range)
+{
+	return s->range_table->range[range].min < 0;
+}
+
+/**
+ * comedi_range_is_unipolar() - Test if subdevice range is unipolar
+ * @s: COMEDI subdevice.
+ * @range: Index of range within a range table.
+ *
+ * Tests whether a range is unipolar by checking whether its minimum value
+ * is at least 0.
+ *
+ * Assumes @range is valid.  Does not work for subdevices using a
+ * channel-specific range table list.
+ *
+ * Return:
+ *	%true if the range is unipolar.
+ *	%false if the range is bipolar.
+ */
+static inline bool comedi_range_is_unipolar(struct comedi_subdevice *s,
+					    unsigned int range)
+{
+	return s->range_table->range[range].min >= 0;
+}
+
+/**
+ * comedi_range_is_external() - Test if subdevice range is external
+ * @s: COMEDI subdevice.
+ * @range: Index of range within a range table.
+ *
+ * Tests whether a range is externally reference by checking whether its
+ * %RF_EXTERNAL flag is set.
+ *
+ * Assumes @range is valid.  Does not work for subdevices using a
+ * channel-specific range table list.
+ *
+ * Return:
+ *	%true if the range is external.
+ *	%false if the range is internal.
+ */
+static inline bool comedi_range_is_external(struct comedi_subdevice *s,
+					    unsigned int range)
+{
+	return !!(s->range_table->range[range].flags & RF_EXTERNAL);
+}
+
+/**
+ * comedi_chan_range_is_bipolar() - Test if channel-specific range is bipolar
+ * @s: COMEDI subdevice.
+ * @chan: The channel number.
+ * @range: Index of range within a range table.
+ *
+ * Tests whether a range is bipolar by checking whether its minimum value
+ * is negative.
+ *
+ * Assumes @chan and @range are valid.  Only works for subdevices with a
+ * channel-specific range table list.
+ *
+ * Return:
+ *	%true if the range is bipolar.
+ *	%false if the range is unipolar.
+ */
+static inline bool comedi_chan_range_is_bipolar(struct comedi_subdevice *s,
+						unsigned int chan,
+						unsigned int range)
+{
+	return s->range_table_list[chan]->range[range].min < 0;
+}
+
+/**
+ * comedi_chan_range_is_unipolar() - Test if channel-specific range is unipolar
+ * @s: COMEDI subdevice.
+ * @chan: The channel number.
+ * @range: Index of range within a range table.
+ *
+ * Tests whether a range is unipolar by checking whether its minimum value
+ * is at least 0.
+ *
+ * Assumes @chan and @range are valid.  Only works for subdevices with a
+ * channel-specific range table list.
+ *
+ * Return:
+ *	%true if the range is unipolar.
+ *	%false if the range is bipolar.
+ */
+static inline bool comedi_chan_range_is_unipolar(struct comedi_subdevice *s,
+						 unsigned int chan,
+						 unsigned int range)
+{
+	return s->range_table_list[chan]->range[range].min >= 0;
+}
+
+/**
+ * comedi_chan_range_is_external() - Test if channel-specific range is external
+ * @s: COMEDI subdevice.
+ * @chan: The channel number.
+ * @range: Index of range within a range table.
+ *
+ * Tests whether a range is externally reference by checking whether its
+ * %RF_EXTERNAL flag is set.
+ *
+ * Assumes @chan and @range are valid.  Only works for subdevices with a
+ * channel-specific range table list.
+ *
+ * Return:
+ *	%true if the range is bipolar.
+ *	%false if the range is unipolar.
+ */
+static inline bool comedi_chan_range_is_external(struct comedi_subdevice *s,
+						 unsigned int chan,
+						 unsigned int range)
+{
+	return !!(s->range_table_list[chan]->range[range].flags & RF_EXTERNAL);
+}
+
+/**
+ * comedi_offset_munge() - Convert between offset binary and 2's complement
+ * @s: COMEDI subdevice.
+ * @val: Value to be converted.
+ *
+ * Toggles the highest bit of a sample value to toggle between offset binary
+ * and 2's complement.  Assumes that @s->maxdata is a power of 2 minus 1.
+ *
+ * Return: The converted value.
+ */
+static inline unsigned int comedi_offset_munge(struct comedi_subdevice *s,
+					       unsigned int val)
+{
+	return val ^ s->maxdata ^ (s->maxdata >> 1);
+}
+
+/**
+ * comedi_bytes_per_sample() - Determine subdevice sample size
+ * @s: COMEDI subdevice.
+ *
+ * The sample size will be 4 (sizeof int) or 2 (sizeof short) depending on
+ * whether the %SDF_LSAMPL subdevice flag is set or not.
+ *
+ * Return: The subdevice sample size.
+ */
+static inline unsigned int comedi_bytes_per_sample(struct comedi_subdevice *s)
+{
+	return s->subdev_flags & SDF_LSAMPL ? sizeof(int) : sizeof(short);
+}
+
+/**
+ * comedi_sample_shift() - Determine log2 of subdevice sample size
+ * @s: COMEDI subdevice.
+ *
+ * The sample size will be 4 (sizeof int) or 2 (sizeof short) depending on
+ * whether the %SDF_LSAMPL subdevice flag is set or not.  The log2 of the
+ * sample size will be 2 or 1 and can be used as the right operand of a
+ * bit-shift operator to multiply or divide something by the sample size.
+ *
+ * Return: log2 of the subdevice sample size.
+ */
+static inline unsigned int comedi_sample_shift(struct comedi_subdevice *s)
+{
+	return s->subdev_flags & SDF_LSAMPL ? 2 : 1;
+}
+
+/**
+ * comedi_bytes_to_samples() - Convert a number of bytes to a number of samples
+ * @s: COMEDI subdevice.
+ * @nbytes: Number of bytes
+ *
+ * Return: The number of bytes divided by the subdevice sample size.
+ */
+static inline unsigned int comedi_bytes_to_samples(struct comedi_subdevice *s,
+						   unsigned int nbytes)
+{
+	return nbytes >> comedi_sample_shift(s);
+}
+
+/**
+ * comedi_samples_to_bytes() - Convert a number of samples to a number of bytes
+ * @s: COMEDI subdevice.
+ * @nsamples: Number of samples.
+ *
+ * Return: The number of samples multiplied by the subdevice sample size.
+ * (Does not check for arithmetic overflow.)
+ */
+static inline unsigned int comedi_samples_to_bytes(struct comedi_subdevice *s,
+						   unsigned int nsamples)
+{
+	return nsamples << comedi_sample_shift(s);
+}
+
+/**
+ * comedi_check_trigger_src() - Trivially validate a comedi_cmd trigger source
+ * @src: Pointer to the trigger source to validate.
+ * @flags: Bitmask of valid %TRIG_* for the trigger.
+ *
+ * This is used in "step 1" of the do_cmdtest functions of comedi drivers
+ * to validate the comedi_cmd triggers. The mask of the @src against the
+ * @flags allows the userspace comedilib to pass all the comedi_cmd
+ * triggers as %TRIG_ANY and get back a bitmask of the valid trigger sources.
+ *
+ * Return:
+ *	0 if trigger sources in *@src are all supported.
+ *	-EINVAL if any trigger source in *@src is unsupported.
+ */
+static inline int comedi_check_trigger_src(unsigned int *src,
+					   unsigned int flags)
+{
+	unsigned int orig_src = *src;
+
+	*src = orig_src & flags;
+	if (*src == TRIG_INVALID || *src != orig_src)
+		return -EINVAL;
+	return 0;
+}
+
+/**
+ * comedi_check_trigger_is_unique() - Make sure a trigger source is unique
+ * @src: The trigger source to check.
+ *
+ * Return:
+ *	0 if no more than one trigger source is set.
+ *	-EINVAL if more than one trigger source is set.
+ */
+static inline int comedi_check_trigger_is_unique(unsigned int src)
+{
+	/* this test is true if more than one _src bit is set */
+	if ((src & (src - 1)) != 0)
+		return -EINVAL;
+	return 0;
+}
+
+/**
+ * comedi_check_trigger_arg_is() - Trivially validate a trigger argument
+ * @arg: Pointer to the trigger arg to validate.
+ * @val: The value the argument should be.
+ *
+ * Forces *@arg to be @val.
+ *
+ * Return:
+ *	0 if *@arg was already @val.
+ *	-EINVAL if *@arg differed from @val.
+ */
+static inline int comedi_check_trigger_arg_is(unsigned int *arg,
+					      unsigned int val)
+{
+	if (*arg != val) {
+		*arg = val;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * comedi_check_trigger_arg_min() - Trivially validate a trigger argument min
+ * @arg: Pointer to the trigger arg to validate.
+ * @val: The minimum value the argument should be.
+ *
+ * Forces *@arg to be at least @val, setting it to @val if necessary.
+ *
+ * Return:
+ *	0 if *@arg was already at least @val.
+ *	-EINVAL if *@arg was less than @val.
+ */
+static inline int comedi_check_trigger_arg_min(unsigned int *arg,
+					       unsigned int val)
+{
+	if (*arg < val) {
+		*arg = val;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * comedi_check_trigger_arg_max() - Trivially validate a trigger argument max
+ * @arg: Pointer to the trigger arg to validate.
+ * @val: The maximum value the argument should be.
+ *
+ * Forces *@arg to be no more than @val, setting it to @val if necessary.
+ *
+ * Return:
+ *	0 if*@arg was already no more than @val.
+ *	-EINVAL if *@arg was greater than @val.
+ */
+static inline int comedi_check_trigger_arg_max(unsigned int *arg,
+					       unsigned int val)
+{
+	if (*arg > val) {
+		*arg = val;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Must set dev->hw_dev if you wish to dma directly into comedi's buffer.
+ * Also useful for retrieving a previously configured hardware device of
+ * known bus type.  Set automatically for auto-configured devices.
+ * Automatically set to NULL when detaching hardware device.
+ */
+int comedi_set_hw_dev(struct comedi_device *dev, struct device *hw_dev);
+
+/**
+ * comedi_buf_n_bytes_ready - Determine amount of unread data in buffer
+ * @s: COMEDI subdevice.
+ *
+ * Determines the number of bytes of unread data in the asynchronous
+ * acquisition data buffer for a subdevice.  The data in question might not
+ * have been fully "munged" yet.
+ *
+ * Returns: The amount of unread data in bytes.
+ */
+static inline unsigned int comedi_buf_n_bytes_ready(struct comedi_subdevice *s)
+{
+	return s->async->buf_write_count - s->async->buf_read_count;
+}
+
+unsigned int comedi_buf_write_alloc(struct comedi_subdevice *s, unsigned int n);
+unsigned int comedi_buf_write_free(struct comedi_subdevice *s, unsigned int n);
+
+unsigned int comedi_buf_read_n_available(struct comedi_subdevice *s);
+unsigned int comedi_buf_read_alloc(struct comedi_subdevice *s, unsigned int n);
+unsigned int comedi_buf_read_free(struct comedi_subdevice *s, unsigned int n);
+
+unsigned int comedi_buf_write_samples(struct comedi_subdevice *s,
+				      const void *data, unsigned int nsamples);
+unsigned int comedi_buf_read_samples(struct comedi_subdevice *s,
+				     void *data, unsigned int nsamples);
+
+/* drivers.c - general comedi driver functions */
+
+#define COMEDI_TIMEOUT_MS	1000
+
+int comedi_timeout(struct comedi_device *dev, struct comedi_subdevice *s,
+		   struct comedi_insn *insn,
+		   int (*cb)(struct comedi_device *dev,
+			     struct comedi_subdevice *s,
+			     struct comedi_insn *insn, unsigned long context),
+		   unsigned long context);
+
+unsigned int comedi_handle_events(struct comedi_device *dev,
+				  struct comedi_subdevice *s);
+
+int comedi_dio_insn_config(struct comedi_device *dev,
+			   struct comedi_subdevice *s,
+			   struct comedi_insn *insn, unsigned int *data,
+			   unsigned int mask);
+unsigned int comedi_dio_update_state(struct comedi_subdevice *s,
+				     unsigned int *data);
+unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s,
+				       struct comedi_cmd *cmd);
+unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s);
+unsigned int comedi_nscans_left(struct comedi_subdevice *s,
+				unsigned int nscans);
+unsigned int comedi_nsamples_left(struct comedi_subdevice *s,
+				  unsigned int nsamples);
+void comedi_inc_scan_progress(struct comedi_subdevice *s,
+			      unsigned int num_bytes);
+
+void *comedi_alloc_devpriv(struct comedi_device *dev, size_t size);
+int comedi_alloc_subdevices(struct comedi_device *dev, int num_subdevices);
+int comedi_alloc_subdev_readback(struct comedi_subdevice *s);
+
+int comedi_readback_insn_read(struct comedi_device *dev,
+			      struct comedi_subdevice *s,
+			      struct comedi_insn *insn, unsigned int *data);
+
+int comedi_load_firmware(struct comedi_device *dev, struct device *hw_dev,
+			 const char *name,
+			 int (*cb)(struct comedi_device *dev,
+				   const u8 *data, size_t size,
+				   unsigned long context),
+			 unsigned long context);
+
+int __comedi_request_region(struct comedi_device *dev,
+			    unsigned long start, unsigned long len);
+int comedi_request_region(struct comedi_device *dev,
+			  unsigned long start, unsigned long len);
+void comedi_legacy_detach(struct comedi_device *dev);
+
+int comedi_auto_config(struct device *hardware_device,
+		       struct comedi_driver *driver, unsigned long context);
+void comedi_auto_unconfig(struct device *hardware_device);
+
+int comedi_driver_register(struct comedi_driver *driver);
+void comedi_driver_unregister(struct comedi_driver *driver);
+
+/**
+ * module_comedi_driver() - Helper macro for registering a comedi driver
+ * @__comedi_driver: comedi_driver struct
+ *
+ * Helper macro for comedi drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only use
+ * this macro once, and calling it replaces module_init() and module_exit().
+ */
+#define module_comedi_driver(__comedi_driver) \
+	module_driver(__comedi_driver, comedi_driver_register, \
+			comedi_driver_unregister)
+
+#endif /* _COMEDIDEV_H */
diff --git a/include/linux/comedi/comedilib.h b/include/linux/comedi/comedilib.h
new file mode 100644
index 000000000000..0223c9cd9215
--- /dev/null
+++ b/include/linux/comedi/comedilib.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * comedilib.h
+ * Header file for kcomedilib
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1998-2001 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _LINUX_COMEDILIB_H
+#define _LINUX_COMEDILIB_H
+
+struct comedi_device *comedi_open(const char *path);
+int comedi_close(struct comedi_device *dev);
+int comedi_dio_get_config(struct comedi_device *dev, unsigned int subdev,
+			  unsigned int chan, unsigned int *io);
+int comedi_dio_config(struct comedi_device *dev, unsigned int subdev,
+		      unsigned int chan, unsigned int io);
+int comedi_dio_bitfield2(struct comedi_device *dev, unsigned int subdev,
+			 unsigned int mask, unsigned int *bits,
+			 unsigned int base_channel);
+int comedi_find_subdevice_by_type(struct comedi_device *dev, int type,
+				  unsigned int subd);
+int comedi_get_n_channels(struct comedi_device *dev, unsigned int subdevice);
+
+#endif
diff --git a/include/uapi/linux/comedi.h b/include/uapi/linux/comedi.h
new file mode 100644
index 000000000000..7314e5ee0a1e
--- /dev/null
+++ b/include/uapi/linux/comedi.h
@@ -0,0 +1,1528 @@
+/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */
+/*
+ * comedi.h
+ * header file for COMEDI user API
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1998-2001 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDI_H
+#define _COMEDI_H
+
+#define COMEDI_MAJORVERSION	0
+#define COMEDI_MINORVERSION	7
+#define COMEDI_MICROVERSION	76
+#define VERSION	"0.7.76"
+
+/* comedi's major device number */
+#define COMEDI_MAJOR 98
+
+/*
+ * maximum number of minor devices.  This can be increased, although
+ * kernel structures are currently statically allocated, thus you
+ * don't want this to be much more than you actually use.
+ */
+#define COMEDI_NDEVICES 16
+
+/* number of config options in the config structure */
+#define COMEDI_NDEVCONFOPTS 32
+
+/*
+ * NOTE: 'comedi_config --init-data' is deprecated
+ *
+ * The following indexes in the config options were used by
+ * comedi_config to pass firmware blobs from user space to the
+ * comedi drivers. The request_firmware() hotplug interface is
+ * now used by all comedi drivers instead.
+ */
+
+/* length of nth chunk of firmware data -*/
+#define COMEDI_DEVCONF_AUX_DATA3_LENGTH		25
+#define COMEDI_DEVCONF_AUX_DATA2_LENGTH		26
+#define COMEDI_DEVCONF_AUX_DATA1_LENGTH		27
+#define COMEDI_DEVCONF_AUX_DATA0_LENGTH		28
+/* most significant 32 bits of pointer address (if needed) */
+#define COMEDI_DEVCONF_AUX_DATA_HI		29
+/* least significant 32 bits of pointer address */
+#define COMEDI_DEVCONF_AUX_DATA_LO		30
+#define COMEDI_DEVCONF_AUX_DATA_LENGTH		31	/* total data length */
+
+/* max length of device and driver names */
+#define COMEDI_NAMELEN 20
+
+/* packs and unpacks a channel/range number */
+
+#define CR_PACK(chan, rng, aref)					\
+	((((aref) & 0x3) << 24) | (((rng) & 0xff) << 16) | (chan))
+#define CR_PACK_FLAGS(chan, range, aref, flags)				\
+	(CR_PACK(chan, range, aref) | ((flags) & CR_FLAGS_MASK))
+
+#define CR_CHAN(a)	((a) & 0xffff)
+#define CR_RANGE(a)	(((a) >> 16) & 0xff)
+#define CR_AREF(a)	(((a) >> 24) & 0x03)
+
+#define CR_FLAGS_MASK	0xfc000000
+#define CR_ALT_FILTER	0x04000000
+#define CR_DITHER	CR_ALT_FILTER
+#define CR_DEGLITCH	CR_ALT_FILTER
+#define CR_ALT_SOURCE	0x08000000
+#define CR_EDGE		0x40000000
+#define CR_INVERT	0x80000000
+
+#define AREF_GROUND	0x00	/* analog ref = analog ground */
+#define AREF_COMMON	0x01	/* analog ref = analog common */
+#define AREF_DIFF	0x02	/* analog ref = differential */
+#define AREF_OTHER	0x03	/* analog ref = other (undefined) */
+
+/* counters -- these are arbitrary values */
+#define GPCT_RESET		0x0001
+#define GPCT_SET_SOURCE		0x0002
+#define GPCT_SET_GATE		0x0004
+#define GPCT_SET_DIRECTION	0x0008
+#define GPCT_SET_OPERATION	0x0010
+#define GPCT_ARM		0x0020
+#define GPCT_DISARM		0x0040
+#define GPCT_GET_INT_CLK_FRQ	0x0080
+
+#define GPCT_INT_CLOCK		0x0001
+#define GPCT_EXT_PIN		0x0002
+#define GPCT_NO_GATE		0x0004
+#define GPCT_UP			0x0008
+#define GPCT_DOWN		0x0010
+#define GPCT_HWUD		0x0020
+#define GPCT_SIMPLE_EVENT	0x0040
+#define GPCT_SINGLE_PERIOD	0x0080
+#define GPCT_SINGLE_PW		0x0100
+#define GPCT_CONT_PULSE_OUT	0x0200
+#define GPCT_SINGLE_PULSE_OUT	0x0400
+
+/* instructions */
+
+#define INSN_MASK_WRITE		0x8000000
+#define INSN_MASK_READ		0x4000000
+#define INSN_MASK_SPECIAL	0x2000000
+
+#define INSN_READ		(0 | INSN_MASK_READ)
+#define INSN_WRITE		(1 | INSN_MASK_WRITE)
+#define INSN_BITS		(2 | INSN_MASK_READ | INSN_MASK_WRITE)
+#define INSN_CONFIG		(3 | INSN_MASK_READ | INSN_MASK_WRITE)
+#define INSN_DEVICE_CONFIG	(INSN_CONFIG | INSN_MASK_SPECIAL)
+#define INSN_GTOD		(4 | INSN_MASK_READ | INSN_MASK_SPECIAL)
+#define INSN_WAIT		(5 | INSN_MASK_WRITE | INSN_MASK_SPECIAL)
+#define INSN_INTTRIG		(6 | INSN_MASK_WRITE | INSN_MASK_SPECIAL)
+
+/* command flags */
+/* These flags are used in comedi_cmd structures */
+
+#define CMDF_BOGUS		0x00000001	/* do the motions */
+
+/* try to use a real-time interrupt while performing command */
+#define CMDF_PRIORITY		0x00000008
+
+/* wake up on end-of-scan events */
+#define CMDF_WAKE_EOS		0x00000020
+
+#define CMDF_WRITE		0x00000040
+
+#define CMDF_RAWDATA		0x00000080
+
+/* timer rounding definitions */
+#define CMDF_ROUND_MASK		0x00030000
+#define CMDF_ROUND_NEAREST	0x00000000
+#define CMDF_ROUND_DOWN		0x00010000
+#define CMDF_ROUND_UP		0x00020000
+#define CMDF_ROUND_UP_NEXT	0x00030000
+
+#define COMEDI_EV_START		0x00040000
+#define COMEDI_EV_SCAN_BEGIN	0x00080000
+#define COMEDI_EV_CONVERT	0x00100000
+#define COMEDI_EV_SCAN_END	0x00200000
+#define COMEDI_EV_STOP		0x00400000
+
+/* compatibility definitions */
+#define TRIG_BOGUS		CMDF_BOGUS
+#define TRIG_RT			CMDF_PRIORITY
+#define TRIG_WAKE_EOS		CMDF_WAKE_EOS
+#define TRIG_WRITE		CMDF_WRITE
+#define TRIG_ROUND_MASK		CMDF_ROUND_MASK
+#define TRIG_ROUND_NEAREST	CMDF_ROUND_NEAREST
+#define TRIG_ROUND_DOWN		CMDF_ROUND_DOWN
+#define TRIG_ROUND_UP		CMDF_ROUND_UP
+#define TRIG_ROUND_UP_NEXT	CMDF_ROUND_UP_NEXT
+
+/* trigger sources */
+
+#define TRIG_ANY	0xffffffff
+#define TRIG_INVALID	0x00000000
+
+#define TRIG_NONE	0x00000001 /* never trigger */
+#define TRIG_NOW	0x00000002 /* trigger now + N ns */
+#define TRIG_FOLLOW	0x00000004 /* trigger on next lower level trig */
+#define TRIG_TIME	0x00000008 /* trigger at time N ns */
+#define TRIG_TIMER	0x00000010 /* trigger at rate N ns */
+#define TRIG_COUNT	0x00000020 /* trigger when count reaches N */
+#define TRIG_EXT	0x00000040 /* trigger on external signal N */
+#define TRIG_INT	0x00000080 /* trigger on comedi-internal signal N */
+#define TRIG_OTHER	0x00000100 /* driver defined */
+
+/* subdevice flags */
+
+#define SDF_BUSY	0x0001	/* device is busy */
+#define SDF_BUSY_OWNER	0x0002	/* device is busy with your job */
+#define SDF_LOCKED	0x0004	/* subdevice is locked */
+#define SDF_LOCK_OWNER	0x0008	/* you own lock */
+#define SDF_MAXDATA	0x0010	/* maxdata depends on channel */
+#define SDF_FLAGS	0x0020	/* flags depend on channel */
+#define SDF_RANGETYPE	0x0040	/* range type depends on channel */
+#define SDF_PWM_COUNTER 0x0080	/* PWM can automatically switch off */
+#define SDF_PWM_HBRIDGE 0x0100	/* PWM is signed (H-bridge) */
+#define SDF_CMD		0x1000	/* can do commands (deprecated) */
+#define SDF_SOFT_CALIBRATED	0x2000 /* subdevice uses software calibration */
+#define SDF_CMD_WRITE		0x4000 /* can do output commands */
+#define SDF_CMD_READ		0x8000 /* can do input commands */
+
+/* subdevice can be read (e.g. analog input) */
+#define SDF_READABLE	0x00010000
+/* subdevice can be written (e.g. analog output) */
+#define SDF_WRITABLE	0x00020000
+#define SDF_WRITEABLE	SDF_WRITABLE	/* spelling error in API */
+/* subdevice does not have externally visible lines */
+#define SDF_INTERNAL	0x00040000
+#define SDF_GROUND	0x00100000	/* can do aref=ground */
+#define SDF_COMMON	0x00200000	/* can do aref=common */
+#define SDF_DIFF	0x00400000	/* can do aref=diff */
+#define SDF_OTHER	0x00800000	/* can do aref=other */
+#define SDF_DITHER	0x01000000	/* can do dithering */
+#define SDF_DEGLITCH	0x02000000	/* can do deglitching */
+#define SDF_MMAP	0x04000000	/* can do mmap() */
+#define SDF_RUNNING	0x08000000	/* subdevice is acquiring data */
+#define SDF_LSAMPL	0x10000000	/* subdevice uses 32-bit samples */
+#define SDF_PACKED	0x20000000	/* subdevice can do packed DIO */
+
+/* subdevice types */
+
+/**
+ * enum comedi_subdevice_type - COMEDI subdevice types
+ * @COMEDI_SUBD_UNUSED:		Unused subdevice.
+ * @COMEDI_SUBD_AI:		Analog input.
+ * @COMEDI_SUBD_AO:		Analog output.
+ * @COMEDI_SUBD_DI:		Digital input.
+ * @COMEDI_SUBD_DO:		Digital output.
+ * @COMEDI_SUBD_DIO:		Digital input/output.
+ * @COMEDI_SUBD_COUNTER:	Counter.
+ * @COMEDI_SUBD_TIMER:		Timer.
+ * @COMEDI_SUBD_MEMORY:		Memory, EEPROM, DPRAM.
+ * @COMEDI_SUBD_CALIB:		Calibration DACs.
+ * @COMEDI_SUBD_PROC:		Processor, DSP.
+ * @COMEDI_SUBD_SERIAL:		Serial I/O.
+ * @COMEDI_SUBD_PWM:		Pulse-Width Modulation output.
+ */
+enum comedi_subdevice_type {
+	COMEDI_SUBD_UNUSED,
+	COMEDI_SUBD_AI,
+	COMEDI_SUBD_AO,
+	COMEDI_SUBD_DI,
+	COMEDI_SUBD_DO,
+	COMEDI_SUBD_DIO,
+	COMEDI_SUBD_COUNTER,
+	COMEDI_SUBD_TIMER,
+	COMEDI_SUBD_MEMORY,
+	COMEDI_SUBD_CALIB,
+	COMEDI_SUBD_PROC,
+	COMEDI_SUBD_SERIAL,
+	COMEDI_SUBD_PWM
+};
+
+/* configuration instructions */
+
+/**
+ * enum comedi_io_direction - COMEDI I/O directions
+ * @COMEDI_INPUT:	Input.
+ * @COMEDI_OUTPUT:	Output.
+ * @COMEDI_OPENDRAIN:	Open-drain (or open-collector) output.
+ *
+ * These are used by the %INSN_CONFIG_DIO_QUERY configuration instruction to
+ * report a direction.  They may also be used in other places where a direction
+ * needs to be specified.
+ */
+enum comedi_io_direction {
+	COMEDI_INPUT = 0,
+	COMEDI_OUTPUT = 1,
+	COMEDI_OPENDRAIN = 2
+};
+
+/**
+ * enum configuration_ids - COMEDI configuration instruction codes
+ * @INSN_CONFIG_DIO_INPUT:	Configure digital I/O as input.
+ * @INSN_CONFIG_DIO_OUTPUT:	Configure digital I/O as output.
+ * @INSN_CONFIG_DIO_OPENDRAIN:	Configure digital I/O as open-drain (or open
+ *				collector) output.
+ * @INSN_CONFIG_ANALOG_TRIG:	Configure analog trigger.
+ * @INSN_CONFIG_ALT_SOURCE:	Configure alternate input source.
+ * @INSN_CONFIG_DIGITAL_TRIG:	Configure digital trigger.
+ * @INSN_CONFIG_BLOCK_SIZE:	Configure block size for DMA transfers.
+ * @INSN_CONFIG_TIMER_1:	Configure divisor for external clock.
+ * @INSN_CONFIG_FILTER:		Configure a filter.
+ * @INSN_CONFIG_CHANGE_NOTIFY:	Configure change notification for digital
+ *				inputs.  (New drivers should use
+ *				%INSN_CONFIG_DIGITAL_TRIG instead.)
+ * @INSN_CONFIG_SERIAL_CLOCK:	Configure clock for serial I/O.
+ * @INSN_CONFIG_BIDIRECTIONAL_DATA: Send and receive byte over serial I/O.
+ * @INSN_CONFIG_DIO_QUERY:	Query direction of digital I/O channel.
+ * @INSN_CONFIG_PWM_OUTPUT:	Configure pulse-width modulator output.
+ * @INSN_CONFIG_GET_PWM_OUTPUT:	Get pulse-width modulator output configuration.
+ * @INSN_CONFIG_ARM:		Arm a subdevice or channel.
+ * @INSN_CONFIG_DISARM:		Disarm a subdevice or channel.
+ * @INSN_CONFIG_GET_COUNTER_STATUS: Get counter status.
+ * @INSN_CONFIG_RESET:		Reset a subdevice or channel.
+ * @INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR: Configure counter/timer as
+ *				single pulse generator.
+ * @INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR: Configure counter/timer as
+ *				pulse train generator.
+ * @INSN_CONFIG_GPCT_QUADRATURE_ENCODER: Configure counter as a quadrature
+ *				encoder.
+ * @INSN_CONFIG_SET_GATE_SRC:	Set counter/timer gate source.
+ * @INSN_CONFIG_GET_GATE_SRC:	Get counter/timer gate source.
+ * @INSN_CONFIG_SET_CLOCK_SRC:	Set counter/timer master clock source.
+ * @INSN_CONFIG_GET_CLOCK_SRC:	Get counter/timer master clock source.
+ * @INSN_CONFIG_SET_OTHER_SRC:	Set counter/timer "other" source.
+ * @INSN_CONFIG_GET_HARDWARE_BUFFER_SIZE: Get size (in bytes) of subdevice's
+ *				on-board FIFOs used during streaming
+ *				input/output.
+ * @INSN_CONFIG_SET_COUNTER_MODE: Set counter/timer mode.
+ * @INSN_CONFIG_8254_SET_MODE:	(Deprecated) Same as
+ *				%INSN_CONFIG_SET_COUNTER_MODE.
+ * @INSN_CONFIG_8254_READ_STATUS: Read status of 8254 counter channel.
+ * @INSN_CONFIG_SET_ROUTING:	Set routing for a channel.
+ * @INSN_CONFIG_GET_ROUTING:	Get routing for a channel.
+ * @INSN_CONFIG_PWM_SET_PERIOD: Set PWM period in nanoseconds.
+ * @INSN_CONFIG_PWM_GET_PERIOD: Get PWM period in nanoseconds.
+ * @INSN_CONFIG_GET_PWM_STATUS: Get PWM status.
+ * @INSN_CONFIG_PWM_SET_H_BRIDGE: Set PWM H bridge duty cycle and polarity for
+ *				a relay simultaneously.
+ * @INSN_CONFIG_PWM_GET_H_BRIDGE: Get PWM H bridge duty cycle and polarity.
+ * @INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS: Get the hardware timing restraints,
+ *				regardless of trigger sources.
+ */
+enum configuration_ids {
+	INSN_CONFIG_DIO_INPUT = COMEDI_INPUT,
+	INSN_CONFIG_DIO_OUTPUT = COMEDI_OUTPUT,
+	INSN_CONFIG_DIO_OPENDRAIN = COMEDI_OPENDRAIN,
+	INSN_CONFIG_ANALOG_TRIG = 16,
+/*	INSN_CONFIG_WAVEFORM = 17, */
+/*	INSN_CONFIG_TRIG = 18, */
+/*	INSN_CONFIG_COUNTER = 19, */
+	INSN_CONFIG_ALT_SOURCE = 20,
+	INSN_CONFIG_DIGITAL_TRIG = 21,
+	INSN_CONFIG_BLOCK_SIZE = 22,
+	INSN_CONFIG_TIMER_1 = 23,
+	INSN_CONFIG_FILTER = 24,
+	INSN_CONFIG_CHANGE_NOTIFY = 25,
+
+	INSN_CONFIG_SERIAL_CLOCK = 26,	/*ALPHA*/
+	INSN_CONFIG_BIDIRECTIONAL_DATA = 27,
+	INSN_CONFIG_DIO_QUERY = 28,
+	INSN_CONFIG_PWM_OUTPUT = 29,
+	INSN_CONFIG_GET_PWM_OUTPUT = 30,
+	INSN_CONFIG_ARM = 31,
+	INSN_CONFIG_DISARM = 32,
+	INSN_CONFIG_GET_COUNTER_STATUS = 33,
+	INSN_CONFIG_RESET = 34,
+	INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR = 1001,
+	INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR = 1002,
+	INSN_CONFIG_GPCT_QUADRATURE_ENCODER = 1003,
+	INSN_CONFIG_SET_GATE_SRC = 2001,
+	INSN_CONFIG_GET_GATE_SRC = 2002,
+	INSN_CONFIG_SET_CLOCK_SRC = 2003,
+	INSN_CONFIG_GET_CLOCK_SRC = 2004,
+	INSN_CONFIG_SET_OTHER_SRC = 2005,
+	INSN_CONFIG_GET_HARDWARE_BUFFER_SIZE = 2006,
+	INSN_CONFIG_SET_COUNTER_MODE = 4097,
+	INSN_CONFIG_8254_SET_MODE = INSN_CONFIG_SET_COUNTER_MODE,
+	INSN_CONFIG_8254_READ_STATUS = 4098,
+	INSN_CONFIG_SET_ROUTING = 4099,
+	INSN_CONFIG_GET_ROUTING = 4109,
+	INSN_CONFIG_PWM_SET_PERIOD = 5000,
+	INSN_CONFIG_PWM_GET_PERIOD = 5001,
+	INSN_CONFIG_GET_PWM_STATUS = 5002,
+	INSN_CONFIG_PWM_SET_H_BRIDGE = 5003,
+	INSN_CONFIG_PWM_GET_H_BRIDGE = 5004,
+	INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS = 5005,
+};
+
+/**
+ * enum device_configuration_ids - COMEDI configuration instruction codes global
+ * to an entire device.
+ * @INSN_DEVICE_CONFIG_TEST_ROUTE:	Validate the possibility of a
+ *					globally-named route
+ * @INSN_DEVICE_CONFIG_CONNECT_ROUTE:	Connect a globally-named route
+ * @INSN_DEVICE_CONFIG_DISCONNECT_ROUTE:Disconnect a globally-named route
+ * @INSN_DEVICE_CONFIG_GET_ROUTES:	Get a list of all globally-named routes
+ *					that are valid for a particular device.
+ */
+enum device_config_route_ids {
+	INSN_DEVICE_CONFIG_TEST_ROUTE = 0,
+	INSN_DEVICE_CONFIG_CONNECT_ROUTE = 1,
+	INSN_DEVICE_CONFIG_DISCONNECT_ROUTE = 2,
+	INSN_DEVICE_CONFIG_GET_ROUTES = 3,
+};
+
+/**
+ * enum comedi_digital_trig_op - operations for configuring a digital trigger
+ * @COMEDI_DIGITAL_TRIG_DISABLE:	Return digital trigger to its default,
+ *					inactive, unconfigured state.
+ * @COMEDI_DIGITAL_TRIG_ENABLE_EDGES:	Set rising and/or falling edge inputs
+ *					that each can fire the trigger.
+ * @COMEDI_DIGITAL_TRIG_ENABLE_LEVELS:	Set a combination of high and/or low
+ *					level inputs that can fire the trigger.
+ *
+ * These are used with the %INSN_CONFIG_DIGITAL_TRIG configuration instruction.
+ * The data for the configuration instruction is as follows...
+ *
+ *   data[%0] = %INSN_CONFIG_DIGITAL_TRIG
+ *
+ *   data[%1] = trigger ID
+ *
+ *   data[%2] = configuration operation
+ *
+ *   data[%3] = configuration parameter 1
+ *
+ *   data[%4] = configuration parameter 2
+ *
+ *   data[%5] = configuration parameter 3
+ *
+ * The trigger ID (data[%1]) is used to differentiate multiple digital triggers
+ * belonging to the same subdevice.  The configuration operation (data[%2]) is
+ * one of the enum comedi_digital_trig_op values.  The configuration
+ * parameters (data[%3], data[%4], and data[%5]) depend on the operation; they
+ * are not used with %COMEDI_DIGITAL_TRIG_DISABLE.
+ *
+ * For %COMEDI_DIGITAL_TRIG_ENABLE_EDGES and %COMEDI_DIGITAL_TRIG_ENABLE_LEVELS,
+ * configuration parameter 1 (data[%3]) contains a "left-shift" value that
+ * specifies the input corresponding to bit 0 of configuration parameters 2
+ * and 3.  This is useful if the trigger has more than 32 inputs.
+ *
+ * For %COMEDI_DIGITAL_TRIG_ENABLE_EDGES, configuration parameter 2 (data[%4])
+ * specifies which of up to 32 inputs have rising-edge sensitivity, and
+ * configuration parameter 3 (data[%5]) specifies which of up to 32 inputs
+ * have falling-edge sensitivity that can fire the trigger.
+ *
+ * For %COMEDI_DIGITAL_TRIG_ENABLE_LEVELS, configuration parameter 2 (data[%4])
+ * specifies which of up to 32 inputs must be at a high level, and
+ * configuration parameter 3 (data[%5]) specifies which of up to 32 inputs
+ * must be at a low level for the trigger to fire.
+ *
+ * Some sequences of %INSN_CONFIG_DIGITAL_TRIG instructions may have a (partly)
+ * accumulative effect, depending on the low-level driver.  This is useful
+ * when setting up a trigger that has more than 32 inputs, or has a combination
+ * of edge- and level-triggered inputs.
+ */
+enum comedi_digital_trig_op {
+	COMEDI_DIGITAL_TRIG_DISABLE = 0,
+	COMEDI_DIGITAL_TRIG_ENABLE_EDGES = 1,
+	COMEDI_DIGITAL_TRIG_ENABLE_LEVELS = 2
+};
+
+/**
+ * enum comedi_support_level - support level for a COMEDI feature
+ * @COMEDI_UNKNOWN_SUPPORT:	Unspecified support for feature.
+ * @COMEDI_SUPPORTED:		Feature is supported.
+ * @COMEDI_UNSUPPORTED:		Feature is unsupported.
+ */
+enum comedi_support_level {
+	COMEDI_UNKNOWN_SUPPORT = 0,
+	COMEDI_SUPPORTED,
+	COMEDI_UNSUPPORTED
+};
+
+/**
+ * enum comedi_counter_status_flags - counter status bits
+ * @COMEDI_COUNTER_ARMED:		Counter is armed.
+ * @COMEDI_COUNTER_COUNTING:		Counter is counting.
+ * @COMEDI_COUNTER_TERMINAL_COUNT:	Counter reached terminal count.
+ *
+ * These bitwise values are used by the %INSN_CONFIG_GET_COUNTER_STATUS
+ * configuration instruction to report the status of a counter.
+ */
+enum comedi_counter_status_flags {
+	COMEDI_COUNTER_ARMED = 0x1,
+	COMEDI_COUNTER_COUNTING = 0x2,
+	COMEDI_COUNTER_TERMINAL_COUNT = 0x4,
+};
+
+/* ioctls */
+
+#define CIO 'd'
+#define COMEDI_DEVCONFIG _IOW(CIO, 0, struct comedi_devconfig)
+#define COMEDI_DEVINFO _IOR(CIO, 1, struct comedi_devinfo)
+#define COMEDI_SUBDINFO _IOR(CIO, 2, struct comedi_subdinfo)
+#define COMEDI_CHANINFO _IOR(CIO, 3, struct comedi_chaninfo)
+/* _IOWR(CIO, 4, ...) is reserved */
+#define COMEDI_LOCK _IO(CIO, 5)
+#define COMEDI_UNLOCK _IO(CIO, 6)
+#define COMEDI_CANCEL _IO(CIO, 7)
+#define COMEDI_RANGEINFO _IOR(CIO, 8, struct comedi_rangeinfo)
+#define COMEDI_CMD _IOR(CIO, 9, struct comedi_cmd)
+#define COMEDI_CMDTEST _IOR(CIO, 10, struct comedi_cmd)
+#define COMEDI_INSNLIST _IOR(CIO, 11, struct comedi_insnlist)
+#define COMEDI_INSN _IOR(CIO, 12, struct comedi_insn)
+#define COMEDI_BUFCONFIG _IOR(CIO, 13, struct comedi_bufconfig)
+#define COMEDI_BUFINFO _IOWR(CIO, 14, struct comedi_bufinfo)
+#define COMEDI_POLL _IO(CIO, 15)
+#define COMEDI_SETRSUBD _IO(CIO, 16)
+#define COMEDI_SETWSUBD _IO(CIO, 17)
+
+/* structures */
+
+/**
+ * struct comedi_insn - COMEDI instruction
+ * @insn:	COMEDI instruction type (%INSN_xxx).
+ * @n:		Length of @data[].
+ * @data:	Pointer to data array operated on by the instruction.
+ * @subdev:	Subdevice index.
+ * @chanspec:	A packed "chanspec" value consisting of channel number,
+ *		analog range index, analog reference type, and flags.
+ * @unused:	Reserved for future use.
+ *
+ * This is used with the %COMEDI_INSN ioctl, and indirectly with the
+ * %COMEDI_INSNLIST ioctl.
+ */
+struct comedi_insn {
+	unsigned int insn;
+	unsigned int n;
+	unsigned int __user *data;
+	unsigned int subdev;
+	unsigned int chanspec;
+	unsigned int unused[3];
+};
+
+/**
+ * struct comedi_insnlist - list of COMEDI instructions
+ * @n_insns:	Number of COMEDI instructions.
+ * @insns:	Pointer to array COMEDI instructions.
+ *
+ * This is used with the %COMEDI_INSNLIST ioctl.
+ */
+struct comedi_insnlist {
+	unsigned int n_insns;
+	struct comedi_insn __user *insns;
+};
+
+/**
+ * struct comedi_cmd - COMEDI asynchronous acquisition command details
+ * @subdev:		Subdevice index.
+ * @flags:		Command flags (%CMDF_xxx).
+ * @start_src:		"Start acquisition" trigger source (%TRIG_xxx).
+ * @start_arg:		"Start acquisition" trigger argument.
+ * @scan_begin_src:	"Scan begin" trigger source.
+ * @scan_begin_arg:	"Scan begin" trigger argument.
+ * @convert_src:	"Convert" trigger source.
+ * @convert_arg:	"Convert" trigger argument.
+ * @scan_end_src:	"Scan end" trigger source.
+ * @scan_end_arg:	"Scan end" trigger argument.
+ * @stop_src:		"Stop acquisition" trigger source.
+ * @stop_arg:		"Stop acquisition" trigger argument.
+ * @chanlist:		Pointer to array of "chanspec" values, containing a
+ *			sequence of channel numbers packed with analog range
+ *			index, etc.
+ * @chanlist_len:	Number of channels in sequence.
+ * @data:		Pointer to miscellaneous set-up data (not used).
+ * @data_len:		Length of miscellaneous set-up data.
+ *
+ * This is used with the %COMEDI_CMD or %COMEDI_CMDTEST ioctl to set-up
+ * or validate an asynchronous acquisition command.  The ioctl may modify
+ * the &struct comedi_cmd and copy it back to the caller.
+ *
+ * Optional command @flags values that can be ORed together...
+ *
+ * %CMDF_BOGUS - makes %COMEDI_CMD ioctl return error %EAGAIN instead of
+ * starting the command.
+ *
+ * %CMDF_PRIORITY - requests "hard real-time" processing (which is not
+ * supported in this version of COMEDI).
+ *
+ * %CMDF_WAKE_EOS - requests the command makes data available for reading
+ * after every "scan" period.
+ *
+ * %CMDF_WRITE - marks the command as being in the "write" (to device)
+ * direction.  This does not need to be specified by the caller unless the
+ * subdevice supports commands in either direction.
+ *
+ * %CMDF_RAWDATA - prevents the command from "munging" the data between the
+ * COMEDI sample format and the raw hardware sample format.
+ *
+ * %CMDF_ROUND_NEAREST - requests timing periods to be rounded to nearest
+ * supported values.
+ *
+ * %CMDF_ROUND_DOWN - requests timing periods to be rounded down to supported
+ * values (frequencies rounded up).
+ *
+ * %CMDF_ROUND_UP - requests timing periods to be rounded up to supported
+ * values (frequencies rounded down).
+ *
+ * Trigger source values for @start_src, @scan_begin_src, @convert_src,
+ * @scan_end_src, and @stop_src...
+ *
+ * %TRIG_ANY - "all ones" value used to test which trigger sources are
+ * supported.
+ *
+ * %TRIG_INVALID - "all zeroes" value used to indicate that all requested
+ * trigger sources are invalid.
+ *
+ * %TRIG_NONE - never trigger (often used as a @stop_src value).
+ *
+ * %TRIG_NOW - trigger after '_arg' nanoseconds.
+ *
+ * %TRIG_FOLLOW - trigger follows another event.
+ *
+ * %TRIG_TIMER - trigger every '_arg' nanoseconds.
+ *
+ * %TRIG_COUNT - trigger when count '_arg' is reached.
+ *
+ * %TRIG_EXT - trigger on external signal specified by '_arg'.
+ *
+ * %TRIG_INT - trigger on internal, software trigger specified by '_arg'.
+ *
+ * %TRIG_OTHER - trigger on other, driver-defined signal specified by '_arg'.
+ */
+struct comedi_cmd {
+	unsigned int subdev;
+	unsigned int flags;
+
+	unsigned int start_src;
+	unsigned int start_arg;
+
+	unsigned int scan_begin_src;
+	unsigned int scan_begin_arg;
+
+	unsigned int convert_src;
+	unsigned int convert_arg;
+
+	unsigned int scan_end_src;
+	unsigned int scan_end_arg;
+
+	unsigned int stop_src;
+	unsigned int stop_arg;
+
+	unsigned int *chanlist;
+	unsigned int chanlist_len;
+
+	short __user *data;
+	unsigned int data_len;
+};
+
+/**
+ * struct comedi_chaninfo - used to retrieve per-channel information
+ * @subdev:		Subdevice index.
+ * @maxdata_list:	Optional pointer to per-channel maximum data values.
+ * @flaglist:		Optional pointer to per-channel flags.
+ * @rangelist:		Optional pointer to per-channel range types.
+ * @unused:		Reserved for future use.
+ *
+ * This is used with the %COMEDI_CHANINFO ioctl to get per-channel information
+ * for the subdevice.  Use of this requires knowledge of the number of channels
+ * and subdevice flags obtained using the %COMEDI_SUBDINFO ioctl.
+ *
+ * The @maxdata_list member must be %NULL unless the %SDF_MAXDATA subdevice
+ * flag is set.  The @flaglist member must be %NULL unless the %SDF_FLAGS
+ * subdevice flag is set.  The @rangelist member must be %NULL unless the
+ * %SDF_RANGETYPE subdevice flag is set.  Otherwise, the arrays they point to
+ * must be at least as long as the number of channels.
+ */
+struct comedi_chaninfo {
+	unsigned int subdev;
+	unsigned int __user *maxdata_list;
+	unsigned int __user *flaglist;
+	unsigned int __user *rangelist;
+	unsigned int unused[4];
+};
+
+/**
+ * struct comedi_rangeinfo - used to retrieve the range table for a channel
+ * @range_type:		Encodes subdevice index (bits 27:24), channel index
+ *			(bits 23:16) and range table length (bits 15:0).
+ * @range_ptr:		Pointer to array of @struct comedi_krange to be filled
+ *			in with the range table for the channel or subdevice.
+ *
+ * This is used with the %COMEDI_RANGEINFO ioctl to retrieve the range table
+ * for a specific channel (if the subdevice has the %SDF_RANGETYPE flag set to
+ * indicate that the range table depends on the channel), or for the subdevice
+ * as a whole (if the %SDF_RANGETYPE flag is clear, indicating the range table
+ * is shared by all channels).
+ *
+ * The @range_type value is an input to the ioctl and comes from a previous
+ * use of the %COMEDI_SUBDINFO ioctl (if the %SDF_RANGETYPE flag is clear),
+ * or the %COMEDI_CHANINFO ioctl (if the %SDF_RANGETYPE flag is set).
+ */
+struct comedi_rangeinfo {
+	unsigned int range_type;
+	void __user *range_ptr;
+};
+
+/**
+ * struct comedi_krange - describes a range in a range table
+ * @min:	Minimum value in millionths (1e-6) of a unit.
+ * @max:	Maximum value in millionths (1e-6) of a unit.
+ * @flags:	Indicates the units (in bits 7:0) OR'ed with optional flags.
+ *
+ * A range table is associated with a single channel, or with all channels in a
+ * subdevice, and a list of one or more ranges.  A %struct comedi_krange
+ * describes the physical range of units for one of those ranges.  Sample
+ * values in COMEDI are unsigned from %0 up to some 'maxdata' value.  The
+ * mapping from sample values to physical units is assumed to be nomimally
+ * linear (for the purpose of describing the range), with sample value %0
+ * mapping to @min, and the 'maxdata' sample value mapping to @max.
+ *
+ * The currently defined units are %UNIT_volt (%0), %UNIT_mA (%1), and
+ * %UNIT_none (%2).  The @min and @max values are the physical range multiplied
+ * by 1e6, so a @max value of %1000000 (with %UNIT_volt) represents a maximal
+ * value of 1 volt.
+ *
+ * The only defined flag value is %RF_EXTERNAL (%0x100), indicating that the
+ * range needs to be multiplied by an external reference.
+ */
+struct comedi_krange {
+	int min;
+	int max;
+	unsigned int flags;
+};
+
+/**
+ * struct comedi_subdinfo - used to retrieve information about a subdevice
+ * @type:		Type of subdevice from &enum comedi_subdevice_type.
+ * @n_chan:		Number of channels the subdevice supports.
+ * @subd_flags:		A mixture of static and dynamic flags describing
+ *			aspects of the subdevice and its current state.
+ * @timer_type:		Timer type.  Always set to %5 ("nanosecond timer").
+ * @len_chanlist:	Maximum length of a channel list if the subdevice
+ *			supports asynchronous acquisition commands.
+ * @maxdata:		Maximum sample value for all channels if the
+ *			%SDF_MAXDATA subdevice flag is clear.
+ * @flags:		Channel flags for all channels if the %SDF_FLAGS
+ *			subdevice flag is clear.
+ * @range_type:		The range type for all channels if the %SDF_RANGETYPE
+ *			subdevice flag is clear.  Encodes the subdevice index
+ *			(bits 27:24), a dummy channel index %0 (bits 23:16),
+ *			and the range table length (bits 15:0).
+ * @settling_time_0:	Not used.
+ * @insn_bits_support:	Set to %COMEDI_SUPPORTED if the subdevice supports the
+ *			%INSN_BITS instruction, or to %COMEDI_UNSUPPORTED if it
+ *			does not.
+ * @unused:		Reserved for future use.
+ *
+ * This is used with the %COMEDI_SUBDINFO ioctl which copies an array of
+ * &struct comedi_subdinfo back to user space, with one element per subdevice.
+ * Use of this requires knowledge of the number of subdevices obtained from
+ * the %COMEDI_DEVINFO ioctl.
+ *
+ * These are the @subd_flags values that may be ORed together...
+ *
+ * %SDF_BUSY - the subdevice is busy processing an asynchronous command or a
+ * synchronous instruction.
+ *
+ * %SDF_BUSY_OWNER - the subdevice is busy processing an asynchronous
+ * acquisition command started on the current file object (the file object
+ * issuing the %COMEDI_SUBDINFO ioctl).
+ *
+ * %SDF_LOCKED - the subdevice is locked by a %COMEDI_LOCK ioctl.
+ *
+ * %SDF_LOCK_OWNER - the subdevice is locked by a %COMEDI_LOCK ioctl from the
+ * current file object.
+ *
+ * %SDF_MAXDATA - maximum sample values are channel-specific.
+ *
+ * %SDF_FLAGS - channel flags are channel-specific.
+ *
+ * %SDF_RANGETYPE - range types are channel-specific.
+ *
+ * %SDF_PWM_COUNTER - PWM can switch off automatically.
+ *
+ * %SDF_PWM_HBRIDGE - or PWM is signed (H-bridge).
+ *
+ * %SDF_CMD - the subdevice supports asynchronous commands.
+ *
+ * %SDF_SOFT_CALIBRATED - the subdevice uses software calibration.
+ *
+ * %SDF_CMD_WRITE - the subdevice supports asynchronous commands in the output
+ * ("write") direction.
+ *
+ * %SDF_CMD_READ - the subdevice supports asynchronous commands in the input
+ * ("read") direction.
+ *
+ * %SDF_READABLE - the subdevice is readable (e.g. analog input).
+ *
+ * %SDF_WRITABLE (aliased as %SDF_WRITEABLE) - the subdevice is writable (e.g.
+ * analog output).
+ *
+ * %SDF_INTERNAL - the subdevice has no externally visible lines.
+ *
+ * %SDF_GROUND - the subdevice can use ground as an analog reference.
+ *
+ * %SDF_COMMON - the subdevice can use a common analog reference.
+ *
+ * %SDF_DIFF - the subdevice can use differential inputs (or outputs).
+ *
+ * %SDF_OTHER - the subdevice can use some other analog reference.
+ *
+ * %SDF_DITHER - the subdevice can do dithering.
+ *
+ * %SDF_DEGLITCH - the subdevice can do deglitching.
+ *
+ * %SDF_MMAP - this is never set.
+ *
+ * %SDF_RUNNING - an asynchronous command is still running.
+ *
+ * %SDF_LSAMPL - the subdevice uses "long" (32-bit) samples (for asynchronous
+ * command data).
+ *
+ * %SDF_PACKED - the subdevice packs several DIO samples into a single sample
+ * (for asynchronous command data).
+ *
+ * No "channel flags" (@flags) values are currently defined.
+ */
+struct comedi_subdinfo {
+	unsigned int type;
+	unsigned int n_chan;
+	unsigned int subd_flags;
+	unsigned int timer_type;
+	unsigned int len_chanlist;
+	unsigned int maxdata;
+	unsigned int flags;
+	unsigned int range_type;
+	unsigned int settling_time_0;
+	unsigned int insn_bits_support;
+	unsigned int unused[8];
+};
+
+/**
+ * struct comedi_devinfo - used to retrieve information about a COMEDI device
+ * @version_code:	COMEDI version code.
+ * @n_subdevs:		Number of subdevices the device has.
+ * @driver_name:	Null-terminated COMEDI driver name.
+ * @board_name:		Null-terminated COMEDI board name.
+ * @read_subdevice:	Index of the current "read" subdevice (%-1 if none).
+ * @write_subdevice:	Index of the current "write" subdevice (%-1 if none).
+ * @unused:		Reserved for future use.
+ *
+ * This is used with the %COMEDI_DEVINFO ioctl to get basic information about
+ * the device.
+ */
+struct comedi_devinfo {
+	unsigned int version_code;
+	unsigned int n_subdevs;
+	char driver_name[COMEDI_NAMELEN];
+	char board_name[COMEDI_NAMELEN];
+	int read_subdevice;
+	int write_subdevice;
+	int unused[30];
+};
+
+/**
+ * struct comedi_devconfig - used to configure a legacy COMEDI device
+ * @board_name:		Null-terminated string specifying the type of board
+ *			to configure.
+ * @options:		An array of integer configuration options.
+ *
+ * This is used with the %COMEDI_DEVCONFIG ioctl to configure a "legacy" COMEDI
+ * device, such as an ISA card.  Not all COMEDI drivers support this.  Those
+ * that do either expect the specified board name to match one of a list of
+ * names registered with the COMEDI core, or expect the specified board name
+ * to match the COMEDI driver name itself.  The configuration options are
+ * handled in a driver-specific manner.
+ */
+struct comedi_devconfig {
+	char board_name[COMEDI_NAMELEN];
+	int options[COMEDI_NDEVCONFOPTS];
+};
+
+/**
+ * struct comedi_bufconfig - used to set or get buffer size for a subdevice
+ * @subdevice:		Subdevice index.
+ * @flags:		Not used.
+ * @maximum_size:	Maximum allowed buffer size.
+ * @size:		Buffer size.
+ * @unused:		Reserved for future use.
+ *
+ * This is used with the %COMEDI_BUFCONFIG ioctl to get or configure the
+ * maximum buffer size and current buffer size for a COMEDI subdevice that
+ * supports asynchronous commands.  If the subdevice does not support
+ * asynchronous commands, @maximum_size and @size are ignored and set to 0.
+ *
+ * On ioctl input, non-zero values of @maximum_size and @size specify a
+ * new maximum size and new current size (in bytes), respectively.  These
+ * will by rounded up to a multiple of %PAGE_SIZE.  Specifying a new maximum
+ * size requires admin capabilities.
+ *
+ * On ioctl output, @maximum_size and @size and set to the current maximum
+ * buffer size and current buffer size, respectively.
+ */
+struct comedi_bufconfig {
+	unsigned int subdevice;
+	unsigned int flags;
+
+	unsigned int maximum_size;
+	unsigned int size;
+
+	unsigned int unused[4];
+};
+
+/**
+ * struct comedi_bufinfo - used to manipulate buffer position for a subdevice
+ * @subdevice:		Subdevice index.
+ * @bytes_read:		Specify amount to advance read position for an
+ *			asynchronous command in the input ("read") direction.
+ * @buf_write_ptr:	Current write position (index) within the buffer.
+ * @buf_read_ptr:	Current read position (index) within the buffer.
+ * @buf_write_count:	Total amount written, modulo 2^32.
+ * @buf_read_count:	Total amount read, modulo 2^32.
+ * @bytes_written:	Specify amount to advance write position for an
+ *			asynchronous command in the output ("write") direction.
+ * @unused:		Reserved for future use.
+ *
+ * This is used with the %COMEDI_BUFINFO ioctl to optionally advance the
+ * current read or write position in an asynchronous acquisition data buffer,
+ * and to get the current read and write positions in the buffer.
+ */
+struct comedi_bufinfo {
+	unsigned int subdevice;
+	unsigned int bytes_read;
+
+	unsigned int buf_write_ptr;
+	unsigned int buf_read_ptr;
+	unsigned int buf_write_count;
+	unsigned int buf_read_count;
+
+	unsigned int bytes_written;
+
+	unsigned int unused[4];
+};
+
+/* range stuff */
+
+#define __RANGE(a, b)	((((a) & 0xffff) << 16) | ((b) & 0xffff))
+
+#define RANGE_OFFSET(a)		(((a) >> 16) & 0xffff)
+#define RANGE_LENGTH(b)		((b) & 0xffff)
+
+#define RF_UNIT(flags)		((flags) & 0xff)
+#define RF_EXTERNAL		0x100
+
+#define UNIT_volt		0
+#define UNIT_mA			1
+#define UNIT_none		2
+
+#define COMEDI_MIN_SPEED	0xffffffffu
+
+/**********************************************************/
+/* everything after this line is ALPHA */
+/**********************************************************/
+
+/*
+ * 8254 specific configuration.
+ *
+ * It supports two config commands:
+ *
+ * 0 ID: INSN_CONFIG_SET_COUNTER_MODE
+ * 1 8254 Mode
+ * I8254_MODE0, I8254_MODE1, ..., I8254_MODE5
+ * OR'ed with:
+ * I8254_BCD, I8254_BINARY
+ *
+ * 0 ID: INSN_CONFIG_8254_READ_STATUS
+ * 1 <-- Status byte returned here.
+ * B7 = Output
+ * B6 = NULL Count
+ * B5 - B0 Current mode.
+ */
+
+enum i8254_mode {
+	I8254_MODE0 = (0 << 1),	/* Interrupt on terminal count */
+	I8254_MODE1 = (1 << 1),	/* Hardware retriggerable one-shot */
+	I8254_MODE2 = (2 << 1),	/* Rate generator */
+	I8254_MODE3 = (3 << 1),	/* Square wave mode */
+	I8254_MODE4 = (4 << 1),	/* Software triggered strobe */
+	/* Hardware triggered strobe (retriggerable) */
+	I8254_MODE5 = (5 << 1),
+	/* Use binary-coded decimal instead of binary (pretty useless) */
+	I8254_BCD = 1,
+	I8254_BINARY = 0
+};
+
+/* *** BEGIN GLOBALLY-NAMED NI TERMINALS/SIGNALS *** */
+
+/*
+ * Common National Instruments Terminal/Signal names.
+ * Some of these have no NI_ prefix as they are useful for non-NI hardware, such
+ * as those that utilize the PXI/RTSI trigger lines.
+ *
+ * NOTE ABOUT THE CHOICE OF NAMES HERE AND THE CAMELSCRIPT:
+ *   The choice to use CamelScript and the exact names below is for
+ *   maintainability, clarity, similarity to manufacturer's documentation,
+ *   _and_ a mitigation for confusion that has plagued the use of these drivers
+ *   for years!
+ *
+ *   More detail:
+ *   There have been significant confusions over the past many years for users
+ *   when trying to understand how to connect to/from signals and terminals on
+ *   NI hardware using comedi.  The major reason for this is that the actual
+ *   register values were exposed and required to be used by users.  Several
+ *   major reasons exist why this caused major confusion for users:
+ *   1) The register values are _NOT_ in user documentation, but rather in
+ *     arcane locations, such as a few register programming manuals that are
+ *     increasingly hard to find and the NI MHDDK (comments in example code).
+ *     There is no one place to find the various valid values of the registers.
+ *   2) The register values are _NOT_ completely consistent.  There is no way to
+ *     gain any sense of intuition of which values, or even enums one should use
+ *     for various registers.  There was some attempt in prior use of comedi to
+ *     name enums such that a user might know which enums should be used for
+ *     varying purposes, but the end-user had to gain a knowledge of register
+ *     values to correctly wield this approach.
+ *   3) The names for signals and registers found in the various register level
+ *     programming manuals and vendor-provided documentation are _not_ even
+ *     close to the same names that are in the end-user documentation.
+ *
+ *   Similar, albeit less, confusion plagued NI's previous version of their own
+ *   drivers.  Earlier than 2003, NI greatly simplified the situation for users
+ *   by releasing a new API that abstracted the names of signals/terminals to a
+ *   common and intuitive set of names.
+ *
+ *   The names below mirror the names chosen and well documented by NI.  These
+ *   names are exposed to the user via the comedilib user library.  By keeping
+ *   the names below, in spite of the use of CamelScript, maintenance will be
+ *   greatly eased and confusion for users _and_ comedi developers will be
+ *   greatly reduced.
+ */
+
+/*
+ * Base of abstracted NI names.
+ * The first 16 bits of *_arg are reserved for channel selection.
+ * Since we only actually need the first 4 or 5 bits for all register values on
+ * NI select registers anyways, we'll identify all values >= (1<<15) as being an
+ * abstracted NI signal/terminal name.
+ * These values are also used/returned by INSN_DEVICE_CONFIG_TEST_ROUTE,
+ * INSN_DEVICE_CONFIG_CONNECT_ROUTE, INSN_DEVICE_CONFIG_DISCONNECT_ROUTE,
+ * and INSN_DEVICE_CONFIG_GET_ROUTES.
+ */
+#define NI_NAMES_BASE	0x8000u
+
+#define _TERM_N(base, n, x)	((base) + ((x) & ((n) - 1)))
+
+/*
+ * not necessarily all allowed 64 PFIs are valid--certainly not for all devices
+ */
+#define NI_PFI(x)		_TERM_N(NI_NAMES_BASE, 64, x)
+/* 8 trigger lines by standard, Some devices cannot talk to all eight. */
+#define TRIGGER_LINE(x)		_TERM_N(NI_PFI(-1) + 1, 8, x)
+/* 4 RTSI shared MUXes to route signals to/from TRIGGER_LINES on NI hardware */
+#define NI_RTSI_BRD(x)		_TERM_N(TRIGGER_LINE(-1) + 1, 4, x)
+
+/* *** Counter/timer names : 8 counters max *** */
+#define NI_MAX_COUNTERS		8
+#define NI_COUNTER_NAMES_BASE	(NI_RTSI_BRD(-1)  + 1)
+#define NI_CtrSource(x)	      _TERM_N(NI_COUNTER_NAMES_BASE, NI_MAX_COUNTERS, x)
+/* Gate, Aux, A,B,Z are all treated, at times as gates */
+#define NI_GATES_NAMES_BASE	(NI_CtrSource(-1) + 1)
+#define NI_CtrGate(x)		_TERM_N(NI_GATES_NAMES_BASE, NI_MAX_COUNTERS, x)
+#define NI_CtrAux(x)		_TERM_N(NI_CtrGate(-1)  + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrA(x)		_TERM_N(NI_CtrAux(-1)   + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrB(x)		_TERM_N(NI_CtrA(-1)     + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrZ(x)		_TERM_N(NI_CtrB(-1)     + 1, NI_MAX_COUNTERS, x)
+#define NI_GATES_NAMES_MAX	NI_CtrZ(-1)
+#define NI_CtrArmStartTrigger(x) _TERM_N(NI_CtrZ(-1)    + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrInternalOutput(x) \
+		      _TERM_N(NI_CtrArmStartTrigger(-1) + 1, NI_MAX_COUNTERS, x)
+/** external pin(s) labeled conveniently as Ctr<i>Out. */
+#define NI_CtrOut(x)   _TERM_N(NI_CtrInternalOutput(-1) + 1, NI_MAX_COUNTERS, x)
+/** For Buffered sampling of ctr -- x series capability. */
+#define NI_CtrSampleClock(x)	_TERM_N(NI_CtrOut(-1)   + 1, NI_MAX_COUNTERS, x)
+#define NI_COUNTER_NAMES_MAX	NI_CtrSampleClock(-1)
+
+enum ni_common_signal_names {
+	/* PXI_Star: this is a non-NI-specific signal */
+	PXI_Star = NI_COUNTER_NAMES_MAX + 1,
+	PXI_Clk10,
+	PXIe_Clk100,
+	NI_AI_SampleClock,
+	NI_AI_SampleClockTimebase,
+	NI_AI_StartTrigger,
+	NI_AI_ReferenceTrigger,
+	NI_AI_ConvertClock,
+	NI_AI_ConvertClockTimebase,
+	NI_AI_PauseTrigger,
+	NI_AI_HoldCompleteEvent,
+	NI_AI_HoldComplete,
+	NI_AI_ExternalMUXClock,
+	NI_AI_STOP, /* pulse signal that occurs when a update is finished(?) */
+	NI_AO_SampleClock,
+	NI_AO_SampleClockTimebase,
+	NI_AO_StartTrigger,
+	NI_AO_PauseTrigger,
+	NI_DI_SampleClock,
+	NI_DI_SampleClockTimebase,
+	NI_DI_StartTrigger,
+	NI_DI_ReferenceTrigger,
+	NI_DI_PauseTrigger,
+	NI_DI_InputBufferFull,
+	NI_DI_ReadyForStartEvent,
+	NI_DI_ReadyForTransferEventBurst,
+	NI_DI_ReadyForTransferEventPipelined,
+	NI_DO_SampleClock,
+	NI_DO_SampleClockTimebase,
+	NI_DO_StartTrigger,
+	NI_DO_PauseTrigger,
+	NI_DO_OutputBufferFull,
+	NI_DO_DataActiveEvent,
+	NI_DO_ReadyForStartEvent,
+	NI_DO_ReadyForTransferEvent,
+	NI_MasterTimebase,
+	NI_20MHzTimebase,
+	NI_80MHzTimebase,
+	NI_100MHzTimebase,
+	NI_200MHzTimebase,
+	NI_100kHzTimebase,
+	NI_10MHzRefClock,
+	NI_FrequencyOutput,
+	NI_ChangeDetectionEvent,
+	NI_AnalogComparisonEvent,
+	NI_WatchdogExpiredEvent,
+	NI_WatchdogExpirationTrigger,
+	NI_SCXI_Trig1,
+	NI_LogicLow,
+	NI_LogicHigh,
+	NI_ExternalStrobe,
+	NI_PFI_DO,
+	NI_CaseGround,
+	/* special internal signal used as variable source for RTSI bus: */
+	NI_RGOUT0,
+
+	/* just a name to make the next more convenient, regardless of above */
+	_NI_NAMES_MAX_PLUS_1,
+	NI_NUM_NAMES = _NI_NAMES_MAX_PLUS_1 - NI_NAMES_BASE,
+};
+
+/* *** END GLOBALLY-NAMED NI TERMINALS/SIGNALS *** */
+
+#define NI_USUAL_PFI_SELECT(x)	(((x) < 10) ? (0x1 + (x)) : (0xb + (x)))
+#define NI_USUAL_RTSI_SELECT(x)	(((x) < 7) ? (0xb + (x)) : 0x1b)
+
+/*
+ * mode bits for NI general-purpose counters, set with
+ * INSN_CONFIG_SET_COUNTER_MODE
+ */
+#define NI_GPCT_COUNTING_MODE_SHIFT 16
+#define NI_GPCT_INDEX_PHASE_BITSHIFT 20
+#define NI_GPCT_COUNTING_DIRECTION_SHIFT 24
+enum ni_gpct_mode_bits {
+	NI_GPCT_GATE_ON_BOTH_EDGES_BIT = 0x4,
+	NI_GPCT_EDGE_GATE_MODE_MASK = 0x18,
+	NI_GPCT_EDGE_GATE_STARTS_STOPS_BITS = 0x0,
+	NI_GPCT_EDGE_GATE_STOPS_STARTS_BITS = 0x8,
+	NI_GPCT_EDGE_GATE_STARTS_BITS = 0x10,
+	NI_GPCT_EDGE_GATE_NO_STARTS_NO_STOPS_BITS = 0x18,
+	NI_GPCT_STOP_MODE_MASK = 0x60,
+	NI_GPCT_STOP_ON_GATE_BITS = 0x00,
+	NI_GPCT_STOP_ON_GATE_OR_TC_BITS = 0x20,
+	NI_GPCT_STOP_ON_GATE_OR_SECOND_TC_BITS = 0x40,
+	NI_GPCT_LOAD_B_SELECT_BIT = 0x80,
+	NI_GPCT_OUTPUT_MODE_MASK = 0x300,
+	NI_GPCT_OUTPUT_TC_PULSE_BITS = 0x100,
+	NI_GPCT_OUTPUT_TC_TOGGLE_BITS = 0x200,
+	NI_GPCT_OUTPUT_TC_OR_GATE_TOGGLE_BITS = 0x300,
+	NI_GPCT_HARDWARE_DISARM_MASK = 0xc00,
+	NI_GPCT_NO_HARDWARE_DISARM_BITS = 0x000,
+	NI_GPCT_DISARM_AT_TC_BITS = 0x400,
+	NI_GPCT_DISARM_AT_GATE_BITS = 0x800,
+	NI_GPCT_DISARM_AT_TC_OR_GATE_BITS = 0xc00,
+	NI_GPCT_LOADING_ON_TC_BIT = 0x1000,
+	NI_GPCT_LOADING_ON_GATE_BIT = 0x4000,
+	NI_GPCT_COUNTING_MODE_MASK = 0x7 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_COUNTING_MODE_NORMAL_BITS =
+		0x0 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_COUNTING_MODE_QUADRATURE_X1_BITS =
+		0x1 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_COUNTING_MODE_QUADRATURE_X2_BITS =
+		0x2 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_COUNTING_MODE_QUADRATURE_X4_BITS =
+		0x3 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_COUNTING_MODE_TWO_PULSE_BITS =
+		0x4 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_COUNTING_MODE_SYNC_SOURCE_BITS =
+		0x6 << NI_GPCT_COUNTING_MODE_SHIFT,
+	NI_GPCT_INDEX_PHASE_MASK = 0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT,
+	NI_GPCT_INDEX_PHASE_LOW_A_LOW_B_BITS =
+		0x0 << NI_GPCT_INDEX_PHASE_BITSHIFT,
+	NI_GPCT_INDEX_PHASE_LOW_A_HIGH_B_BITS =
+		0x1 << NI_GPCT_INDEX_PHASE_BITSHIFT,
+	NI_GPCT_INDEX_PHASE_HIGH_A_LOW_B_BITS =
+		0x2 << NI_GPCT_INDEX_PHASE_BITSHIFT,
+	NI_GPCT_INDEX_PHASE_HIGH_A_HIGH_B_BITS =
+		0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT,
+	NI_GPCT_INDEX_ENABLE_BIT = 0x400000,
+	NI_GPCT_COUNTING_DIRECTION_MASK =
+		0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT,
+	NI_GPCT_COUNTING_DIRECTION_DOWN_BITS =
+		0x00 << NI_GPCT_COUNTING_DIRECTION_SHIFT,
+	NI_GPCT_COUNTING_DIRECTION_UP_BITS =
+		0x1 << NI_GPCT_COUNTING_DIRECTION_SHIFT,
+	NI_GPCT_COUNTING_DIRECTION_HW_UP_DOWN_BITS =
+		0x2 << NI_GPCT_COUNTING_DIRECTION_SHIFT,
+	NI_GPCT_COUNTING_DIRECTION_HW_GATE_BITS =
+		0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT,
+	NI_GPCT_RELOAD_SOURCE_MASK = 0xc000000,
+	NI_GPCT_RELOAD_SOURCE_FIXED_BITS = 0x0,
+	NI_GPCT_RELOAD_SOURCE_SWITCHING_BITS = 0x4000000,
+	NI_GPCT_RELOAD_SOURCE_GATE_SELECT_BITS = 0x8000000,
+	NI_GPCT_OR_GATE_BIT = 0x10000000,
+	NI_GPCT_INVERT_OUTPUT_BIT = 0x20000000
+};
+
+/*
+ * Bits for setting a clock source with
+ * INSN_CONFIG_SET_CLOCK_SRC when using NI general-purpose counters.
+ */
+enum ni_gpct_clock_source_bits {
+	NI_GPCT_CLOCK_SRC_SELECT_MASK = 0x3f,
+	NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS = 0x0,
+	NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS = 0x1,
+	NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS = 0x2,
+	NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS = 0x3,
+	NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS = 0x4,
+	NI_GPCT_NEXT_TC_CLOCK_SRC_BITS = 0x5,
+	/* NI 660x-specific */
+	NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS = 0x6,
+	NI_GPCT_PXI10_CLOCK_SRC_BITS = 0x7,
+	NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS = 0x8,
+	NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS = 0x9,
+	NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK = 0x30000000,
+	NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS = 0x0,
+	/* divide source by 2 */
+	NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS = 0x10000000,
+	/* divide source by 8 */
+	NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS = 0x20000000,
+	NI_GPCT_INVERT_CLOCK_SRC_BIT = 0x80000000
+};
+
+/* NI 660x-specific */
+#define NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(x)	(0x10 + (x))
+
+#define NI_GPCT_RTSI_CLOCK_SRC_BITS(x)		(0x18 + (x))
+
+/* no pfi on NI 660x */
+#define NI_GPCT_PFI_CLOCK_SRC_BITS(x)		(0x20 + (x))
+
+/*
+ * Possibilities for setting a gate source with
+ * INSN_CONFIG_SET_GATE_SRC when using NI general-purpose counters.
+ * May be bitwise-or'd with CR_EDGE or CR_INVERT.
+ */
+enum ni_gpct_gate_select {
+	/* m-series gates */
+	NI_GPCT_TIMESTAMP_MUX_GATE_SELECT = 0x0,
+	NI_GPCT_AI_START2_GATE_SELECT = 0x12,
+	NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT = 0x13,
+	NI_GPCT_NEXT_OUT_GATE_SELECT = 0x14,
+	NI_GPCT_AI_START1_GATE_SELECT = 0x1c,
+	NI_GPCT_NEXT_SOURCE_GATE_SELECT = 0x1d,
+	NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT = 0x1e,
+	NI_GPCT_LOGIC_LOW_GATE_SELECT = 0x1f,
+	/* more gates for 660x */
+	NI_GPCT_SOURCE_PIN_i_GATE_SELECT = 0x100,
+	NI_GPCT_GATE_PIN_i_GATE_SELECT = 0x101,
+	/* more gates for 660x "second gate" */
+	NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT = 0x201,
+	NI_GPCT_SELECTED_GATE_GATE_SELECT = 0x21e,
+	/*
+	 * m-series "second gate" sources are unknown,
+	 * we should add them here with an offset of 0x300 when
+	 * known.
+	 */
+	NI_GPCT_DISABLED_GATE_SELECT = 0x8000,
+};
+
+#define NI_GPCT_GATE_PIN_GATE_SELECT(x)		(0x102 + (x))
+#define NI_GPCT_RTSI_GATE_SELECT(x)		NI_USUAL_RTSI_SELECT(x)
+#define NI_GPCT_PFI_GATE_SELECT(x)		NI_USUAL_PFI_SELECT(x)
+#define NI_GPCT_UP_DOWN_PIN_GATE_SELECT(x)	(0x202 + (x))
+
+/*
+ * Possibilities for setting a source with
+ * INSN_CONFIG_SET_OTHER_SRC when using NI general-purpose counters.
+ */
+enum ni_gpct_other_index {
+	NI_GPCT_SOURCE_ENCODER_A,
+	NI_GPCT_SOURCE_ENCODER_B,
+	NI_GPCT_SOURCE_ENCODER_Z
+};
+
+enum ni_gpct_other_select {
+	/* m-series gates */
+	/* Still unknown, probably only need NI_GPCT_PFI_OTHER_SELECT */
+	NI_GPCT_DISABLED_OTHER_SELECT = 0x8000,
+};
+
+#define NI_GPCT_PFI_OTHER_SELECT(x)	NI_USUAL_PFI_SELECT(x)
+
+/*
+ * start sources for ni general-purpose counters for use with
+ * INSN_CONFIG_ARM
+ */
+enum ni_gpct_arm_source {
+	NI_GPCT_ARM_IMMEDIATE = 0x0,
+	/*
+	 * Start both the counter and the adjacent paired counter simultaneously
+	 */
+	NI_GPCT_ARM_PAIRED_IMMEDIATE = 0x1,
+	/*
+	 * If the NI_GPCT_HW_ARM bit is set, we will pass the least significant
+	 * bits (3 bits for 660x or 5 bits for m-series) through to the
+	 * hardware. To select a hardware trigger, pass the appropriate select
+	 * bit, e.g.,
+	 * NI_GPCT_HW_ARM | NI_GPCT_AI_START1_GATE_SELECT or
+	 * NI_GPCT_HW_ARM | NI_GPCT_PFI_GATE_SELECT(pfi_number)
+	 */
+	NI_GPCT_HW_ARM = 0x1000,
+	NI_GPCT_ARM_UNKNOWN = NI_GPCT_HW_ARM,	/* for backward compatibility */
+};
+
+/* digital filtering options for ni 660x for use with INSN_CONFIG_FILTER. */
+enum ni_gpct_filter_select {
+	NI_GPCT_FILTER_OFF = 0x0,
+	NI_GPCT_FILTER_TIMEBASE_3_SYNC = 0x1,
+	NI_GPCT_FILTER_100x_TIMEBASE_1 = 0x2,
+	NI_GPCT_FILTER_20x_TIMEBASE_1 = 0x3,
+	NI_GPCT_FILTER_10x_TIMEBASE_1 = 0x4,
+	NI_GPCT_FILTER_2x_TIMEBASE_1 = 0x5,
+	NI_GPCT_FILTER_2x_TIMEBASE_3 = 0x6
+};
+
+/*
+ * PFI digital filtering options for ni m-series for use with
+ * INSN_CONFIG_FILTER.
+ */
+enum ni_pfi_filter_select {
+	NI_PFI_FILTER_OFF = 0x0,
+	NI_PFI_FILTER_125ns = 0x1,
+	NI_PFI_FILTER_6425ns = 0x2,
+	NI_PFI_FILTER_2550us = 0x3
+};
+
+/* master clock sources for ni mio boards and INSN_CONFIG_SET_CLOCK_SRC */
+enum ni_mio_clock_source {
+	NI_MIO_INTERNAL_CLOCK = 0,
+	/*
+	 * Doesn't work for m-series, use NI_MIO_PLL_RTSI_CLOCK()
+	 * the NI_MIO_PLL_* sources are m-series only
+	 */
+	NI_MIO_RTSI_CLOCK = 1,
+	NI_MIO_PLL_PXI_STAR_TRIGGER_CLOCK = 2,
+	NI_MIO_PLL_PXI10_CLOCK = 3,
+	NI_MIO_PLL_RTSI0_CLOCK = 4
+};
+
+#define NI_MIO_PLL_RTSI_CLOCK(x)	(NI_MIO_PLL_RTSI0_CLOCK + (x))
+
+/*
+ * Signals which can be routed to an NI RTSI pin with INSN_CONFIG_SET_ROUTING.
+ * The numbers assigned are not arbitrary, they correspond to the bits required
+ * to program the board.
+ */
+enum ni_rtsi_routing {
+	NI_RTSI_OUTPUT_ADR_START1 = 0,
+	NI_RTSI_OUTPUT_ADR_START2 = 1,
+	NI_RTSI_OUTPUT_SCLKG = 2,
+	NI_RTSI_OUTPUT_DACUPDN = 3,
+	NI_RTSI_OUTPUT_DA_START1 = 4,
+	NI_RTSI_OUTPUT_G_SRC0 = 5,
+	NI_RTSI_OUTPUT_G_GATE0 = 6,
+	NI_RTSI_OUTPUT_RGOUT0 = 7,
+	NI_RTSI_OUTPUT_RTSI_BRD_0 = 8,
+	/* Pre-m-series always have RTSI clock on line 7 */
+	NI_RTSI_OUTPUT_RTSI_OSC = 12
+};
+
+#define NI_RTSI_OUTPUT_RTSI_BRD(x)	(NI_RTSI_OUTPUT_RTSI_BRD_0 + (x))
+
+/*
+ * Signals which can be routed to an NI PFI pin on an m-series board with
+ * INSN_CONFIG_SET_ROUTING.  These numbers are also returned by
+ * INSN_CONFIG_GET_ROUTING on pre-m-series boards, even though their routing
+ * cannot be changed.  The numbers assigned are not arbitrary, they correspond
+ * to the bits required to program the board.
+ */
+enum ni_pfi_routing {
+	NI_PFI_OUTPUT_PFI_DEFAULT = 0,
+	NI_PFI_OUTPUT_AI_START1 = 1,
+	NI_PFI_OUTPUT_AI_START2 = 2,
+	NI_PFI_OUTPUT_AI_CONVERT = 3,
+	NI_PFI_OUTPUT_G_SRC1 = 4,
+	NI_PFI_OUTPUT_G_GATE1 = 5,
+	NI_PFI_OUTPUT_AO_UPDATE_N = 6,
+	NI_PFI_OUTPUT_AO_START1 = 7,
+	NI_PFI_OUTPUT_AI_START_PULSE = 8,
+	NI_PFI_OUTPUT_G_SRC0 = 9,
+	NI_PFI_OUTPUT_G_GATE0 = 10,
+	NI_PFI_OUTPUT_EXT_STROBE = 11,
+	NI_PFI_OUTPUT_AI_EXT_MUX_CLK = 12,
+	NI_PFI_OUTPUT_GOUT0 = 13,
+	NI_PFI_OUTPUT_GOUT1 = 14,
+	NI_PFI_OUTPUT_FREQ_OUT = 15,
+	NI_PFI_OUTPUT_PFI_DO = 16,
+	NI_PFI_OUTPUT_I_ATRIG = 17,
+	NI_PFI_OUTPUT_RTSI0 = 18,
+	NI_PFI_OUTPUT_PXI_STAR_TRIGGER_IN = 26,
+	NI_PFI_OUTPUT_SCXI_TRIG1 = 27,
+	NI_PFI_OUTPUT_DIO_CHANGE_DETECT_RTSI = 28,
+	NI_PFI_OUTPUT_CDI_SAMPLE = 29,
+	NI_PFI_OUTPUT_CDO_UPDATE = 30
+};
+
+#define NI_PFI_OUTPUT_RTSI(x)		(NI_PFI_OUTPUT_RTSI0 + (x))
+
+/*
+ * Signals which can be routed to output on a NI PFI pin on a 660x board
+ * with INSN_CONFIG_SET_ROUTING.  The numbers assigned are
+ * not arbitrary, they correspond to the bits required
+ * to program the board.  Lines 0 to 7 can only be set to
+ * NI_660X_PFI_OUTPUT_DIO.  Lines 32 to 39 can only be set to
+ * NI_660X_PFI_OUTPUT_COUNTER.
+ */
+enum ni_660x_pfi_routing {
+	NI_660X_PFI_OUTPUT_COUNTER = 1,	/* counter */
+	NI_660X_PFI_OUTPUT_DIO = 2,	/* static digital output */
+};
+
+/*
+ * NI External Trigger lines.  These values are not arbitrary, but are related
+ * to the bits required to program the board (offset by 1 for historical
+ * reasons).
+ */
+#define NI_EXT_PFI(x)			(NI_USUAL_PFI_SELECT(x) - 1)
+#define NI_EXT_RTSI(x)			(NI_USUAL_RTSI_SELECT(x) - 1)
+
+/*
+ * Clock sources for CDIO subdevice on NI m-series boards.  Used as the
+ * scan_begin_arg for a comedi_command. These sources may also be bitwise-or'd
+ * with CR_INVERT to change polarity.
+ */
+enum ni_m_series_cdio_scan_begin_src {
+	NI_CDIO_SCAN_BEGIN_SRC_GROUND = 0,
+	NI_CDIO_SCAN_BEGIN_SRC_AI_START = 18,
+	NI_CDIO_SCAN_BEGIN_SRC_AI_CONVERT = 19,
+	NI_CDIO_SCAN_BEGIN_SRC_PXI_STAR_TRIGGER = 20,
+	NI_CDIO_SCAN_BEGIN_SRC_G0_OUT = 28,
+	NI_CDIO_SCAN_BEGIN_SRC_G1_OUT = 29,
+	NI_CDIO_SCAN_BEGIN_SRC_ANALOG_TRIGGER = 30,
+	NI_CDIO_SCAN_BEGIN_SRC_AO_UPDATE = 31,
+	NI_CDIO_SCAN_BEGIN_SRC_FREQ_OUT = 32,
+	NI_CDIO_SCAN_BEGIN_SRC_DIO_CHANGE_DETECT_IRQ = 33
+};
+
+#define NI_CDIO_SCAN_BEGIN_SRC_PFI(x)	NI_USUAL_PFI_SELECT(x)
+#define NI_CDIO_SCAN_BEGIN_SRC_RTSI(x)	NI_USUAL_RTSI_SELECT(x)
+
+/*
+ * scan_begin_src for scan_begin_arg==TRIG_EXT with analog output command on NI
+ * boards.  These scan begin sources can also be bitwise-or'd with CR_INVERT to
+ * change polarity.
+ */
+#define NI_AO_SCAN_BEGIN_SRC_PFI(x)	NI_USUAL_PFI_SELECT(x)
+#define NI_AO_SCAN_BEGIN_SRC_RTSI(x)	NI_USUAL_RTSI_SELECT(x)
+
+/*
+ * Bits for setting a clock source with
+ * INSN_CONFIG_SET_CLOCK_SRC when using NI frequency output subdevice.
+ */
+enum ni_freq_out_clock_source_bits {
+	NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC,	/* 10 MHz */
+	NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC	/* 100 KHz */
+};
+
+/*
+ * Values for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC for
+ * 8254 counter subdevices on Amplicon DIO boards (amplc_dio200 driver).
+ */
+enum amplc_dio_clock_source {
+	/*
+	 * Per channel external clock
+	 * input/output pin (pin is only an
+	 * input when clock source set to this value,
+	 * otherwise it is an output)
+	 */
+	AMPLC_DIO_CLK_CLKN,
+	AMPLC_DIO_CLK_10MHZ,	/* 10 MHz internal clock */
+	AMPLC_DIO_CLK_1MHZ,	/* 1 MHz internal clock */
+	AMPLC_DIO_CLK_100KHZ,	/* 100 kHz internal clock */
+	AMPLC_DIO_CLK_10KHZ,	/* 10 kHz internal clock */
+	AMPLC_DIO_CLK_1KHZ,	/* 1 kHz internal clock */
+	/*
+	 * Output of preceding counter channel
+	 * (for channel 0, preceding counter
+	 * channel is channel 2 on preceding
+	 * counter subdevice, for first counter
+	 * subdevice, preceding counter
+	 * subdevice is the last counter
+	 * subdevice)
+	 */
+	AMPLC_DIO_CLK_OUTNM1,
+	AMPLC_DIO_CLK_EXT,	/* per chip external input pin */
+	/* the following are "enhanced" clock sources for PCIe models */
+	AMPLC_DIO_CLK_VCC,	/* clock input HIGH */
+	AMPLC_DIO_CLK_GND,	/* clock input LOW */
+	AMPLC_DIO_CLK_PAT_PRESENT, /* "pattern present" signal */
+	AMPLC_DIO_CLK_20MHZ	/* 20 MHz internal clock */
+};
+
+/*
+ * Values for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC for
+ * timer subdevice on some Amplicon DIO PCIe boards (amplc_dio200 driver).
+ */
+enum amplc_dio_ts_clock_src {
+	AMPLC_DIO_TS_CLK_1GHZ,	/* 1 ns period with 20 ns granularity */
+	AMPLC_DIO_TS_CLK_1MHZ,	/* 1 us period */
+	AMPLC_DIO_TS_CLK_1KHZ	/* 1 ms period */
+};
+
+/*
+ * Values for setting a gate source with INSN_CONFIG_SET_GATE_SRC for
+ * 8254 counter subdevices on Amplicon DIO boards (amplc_dio200 driver).
+ */
+enum amplc_dio_gate_source {
+	AMPLC_DIO_GAT_VCC,	/* internal high logic level */
+	AMPLC_DIO_GAT_GND,	/* internal low logic level */
+	AMPLC_DIO_GAT_GATN,	/* per channel external gate input */
+	/*
+	 * negated output of counter channel minus 2
+	 * (for channels 0 or 1, channel minus 2 is channel 1 or 2 on
+	 * the preceding counter subdevice, for the first counter subdevice
+	 * the preceding counter subdevice is the last counter subdevice)
+	 */
+	AMPLC_DIO_GAT_NOUTNM2,
+	AMPLC_DIO_GAT_RESERVED4,
+	AMPLC_DIO_GAT_RESERVED5,
+	AMPLC_DIO_GAT_RESERVED6,
+	AMPLC_DIO_GAT_RESERVED7,
+	/* the following are "enhanced" gate sources for PCIe models */
+	AMPLC_DIO_GAT_NGATN = 6, /* negated per channel gate input */
+	/* non-negated output of counter channel minus 2 */
+	AMPLC_DIO_GAT_OUTNM2,
+	AMPLC_DIO_GAT_PAT_PRESENT, /* "pattern present" signal */
+	AMPLC_DIO_GAT_PAT_OCCURRED, /* "pattern occurred" latched */
+	AMPLC_DIO_GAT_PAT_GONE,	/* "pattern gone away" latched */
+	AMPLC_DIO_GAT_NPAT_PRESENT, /* negated "pattern present" */
+	AMPLC_DIO_GAT_NPAT_OCCURRED, /* negated "pattern occurred" */
+	AMPLC_DIO_GAT_NPAT_GONE	/* negated "pattern gone away" */
+};
+
+/*
+ * Values for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC for
+ * the counter subdevice on the Kolter Electronic PCI-Counter board
+ * (ke_counter driver).
+ */
+enum ke_counter_clock_source {
+	KE_CLK_20MHZ,	/* internal 20MHz (default) */
+	KE_CLK_4MHZ,	/* internal 4MHz (option) */
+	KE_CLK_EXT	/* external clock on pin 21 of D-Sub */
+};
+
+#endif /* _COMEDI_H */
-- 
cgit v1.2.3


From 631e272b12075b60f7c7fc4f84f937d78a699844 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Wed, 17 Nov 2021 12:06:01 +0000
Subject: comedi: Move and rename "8255.h" to <linux/comedi/comedi_8255.h>

Some of the header files in "drivers/comedi/drivers/" are common enough
to be useful to out-of-tree comedi driver modules.  Using them for
out-of-tree module builds is hampered by the headers being outside the
"include/" directory so it is desirable to move them.

There are about a couple of dozen Comedi device drivers that use the
"comedi_8255" module to add digital I/O subdevices based on the
venerable 8255 Programmable Peripheral Interface chip.  The macros and
declarations to use that module are in the "8255.h" header file in the
comedi "drivers" directory.  Move it into "include/linux/comedi/" and
rename it to "comedi_8255.h" for naming consistency reasons.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20211117120604.117740-4-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/comedi/comedi_8255.h | 42 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 include/linux/comedi/comedi_8255.h

(limited to 'include')

diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h
new file mode 100644
index 000000000000..b2a5bc6b3a49
--- /dev/null
+++ b/include/linux/comedi/comedi_8255.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * comedi_8255.h
+ * Generic 8255 digital I/O subdevice support
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 1998 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDI_8255_H
+#define _COMEDI_8255_H
+
+#define I8255_SIZE		0x04
+
+#define I8255_DATA_A_REG	0x00
+#define I8255_DATA_B_REG	0x01
+#define I8255_DATA_C_REG	0x02
+#define I8255_CTRL_REG		0x03
+#define I8255_CTRL_C_LO_IO	BIT(0)
+#define I8255_CTRL_B_IO		BIT(1)
+#define I8255_CTRL_B_MODE	BIT(2)
+#define I8255_CTRL_C_HI_IO	BIT(3)
+#define I8255_CTRL_A_IO		BIT(4)
+#define I8255_CTRL_A_MODE(x)	((x) << 5)
+#define I8255_CTRL_CW		BIT(7)
+
+struct comedi_device;
+struct comedi_subdevice;
+
+int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s,
+		     int (*io)(struct comedi_device *dev, int dir, int port,
+			       int data, unsigned long regbase),
+		     unsigned long regbase);
+
+int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s,
+			int (*io)(struct comedi_device *dev, int dir, int port,
+				  int data, unsigned long regbase),
+			unsigned long regbase);
+
+unsigned long subdev_8255_regbase(struct comedi_subdevice *s);
+
+#endif
-- 
cgit v1.2.3


From 44fb7affcfa4e968e9c2ede023ef0e15f06d8209 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Wed, 17 Nov 2021 12:06:02 +0000
Subject: comedi: Move "comedi_8254.h" to <linux/comedi/comedi_8254.h>

Some of the header files in "drivers/comedi/drivers/" are common enough
to be useful to out-of-tree comedi driver modules.  Using them for
out-of-tree module builds is hampered by the headers being outside the
"include/" directory so it is desirable to move them.

There are about a couple of dozen or so Comedi device drivers that use
the "comedi_8254" module to add timers based on the venerable 8254
Programmable Interval Timer chip.  The macros and declarations to use
that module are in the "comedi_8254.h" header file in the comedi
"drivers" directory.  Move it into "include/linux/comedi/".

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20211117120604.117740-5-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/comedi/comedi_8254.h | 134 +++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 include/linux/comedi/comedi_8254.h

(limited to 'include')

diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h
new file mode 100644
index 000000000000..d8264417e53c
--- /dev/null
+++ b/include/linux/comedi/comedi_8254.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * comedi_8254.h
+ * Generic 8254 timer/counter support
+ * Copyright (C) 2014 H Hartley Sweeten <hsweeten@visionengravers.com>
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2000 David A. Schleef <ds@schleef.org>
+ */
+
+#ifndef _COMEDI_8254_H
+#define _COMEDI_8254_H
+
+#include <linux/types.h>
+
+struct comedi_device;
+struct comedi_insn;
+struct comedi_subdevice;
+
+/*
+ * Common oscillator base values in nanoseconds
+ */
+#define I8254_OSC_BASE_10MHZ	100
+#define I8254_OSC_BASE_5MHZ	200
+#define I8254_OSC_BASE_4MHZ	250
+#define I8254_OSC_BASE_2MHZ	500
+#define I8254_OSC_BASE_1MHZ	1000
+#define I8254_OSC_BASE_100KHZ	10000
+#define I8254_OSC_BASE_10KHZ	100000
+#define I8254_OSC_BASE_1KHZ	1000000
+
+/*
+ * I/O access size used to read/write registers
+ */
+#define I8254_IO8		1
+#define I8254_IO16		2
+#define I8254_IO32		4
+
+/*
+ * Register map for generic 8254 timer (I8254_IO8 with 0 regshift)
+ */
+#define I8254_COUNTER0_REG		0x00
+#define I8254_COUNTER1_REG		0x01
+#define I8254_COUNTER2_REG		0x02
+#define I8254_CTRL_REG			0x03
+#define I8254_CTRL_SEL_CTR(x)		((x) << 6)
+#define I8254_CTRL_READBACK(x)		(I8254_CTRL_SEL_CTR(3) | BIT(x))
+#define I8254_CTRL_READBACK_COUNT	I8254_CTRL_READBACK(4)
+#define I8254_CTRL_READBACK_STATUS	I8254_CTRL_READBACK(5)
+#define I8254_CTRL_READBACK_SEL_CTR(x)	(2 << (x))
+#define I8254_CTRL_RW(x)		(((x) & 0x3) << 4)
+#define I8254_CTRL_LATCH		I8254_CTRL_RW(0)
+#define I8254_CTRL_LSB_ONLY		I8254_CTRL_RW(1)
+#define I8254_CTRL_MSB_ONLY		I8254_CTRL_RW(2)
+#define I8254_CTRL_LSB_MSB		I8254_CTRL_RW(3)
+
+/* counter maps zero to 0x10000 */
+#define I8254_MAX_COUNT			0x10000
+
+/**
+ * struct comedi_8254 - private data used by this module
+ * @iobase:		PIO base address of the registers (in/out)
+ * @mmio:		MMIO base address of the registers (read/write)
+ * @iosize:		I/O size used to access the registers (b/w/l)
+ * @regshift:		register gap shift
+ * @osc_base:		cascaded oscillator speed in ns
+ * @divisor:		divisor for single counter
+ * @divisor1:		divisor loaded into first cascaded counter
+ * @divisor2:		divisor loaded into second cascaded counter
+ * #next_div:		next divisor for single counter
+ * @next_div1:		next divisor to use for first cascaded counter
+ * @next_div2:		next divisor to use for second cascaded counter
+ * @clock_src;		current clock source for each counter (driver specific)
+ * @gate_src;		current gate source  for each counter (driver specific)
+ * @busy:		flags used to indicate that a counter is "busy"
+ * @insn_config:	driver specific (*insn_config) callback
+ */
+struct comedi_8254 {
+	unsigned long iobase;
+	void __iomem *mmio;
+	unsigned int iosize;
+	unsigned int regshift;
+	unsigned int osc_base;
+	unsigned int divisor;
+	unsigned int divisor1;
+	unsigned int divisor2;
+	unsigned int next_div;
+	unsigned int next_div1;
+	unsigned int next_div2;
+	unsigned int clock_src[3];
+	unsigned int gate_src[3];
+	bool busy[3];
+
+	int (*insn_config)(struct comedi_device *dev,
+			   struct comedi_subdevice *s,
+			   struct comedi_insn *insn, unsigned int *data);
+};
+
+unsigned int comedi_8254_status(struct comedi_8254 *i8254,
+				unsigned int counter);
+unsigned int comedi_8254_read(struct comedi_8254 *i8254, unsigned int counter);
+void comedi_8254_write(struct comedi_8254 *i8254,
+		       unsigned int counter, unsigned int val);
+
+int comedi_8254_set_mode(struct comedi_8254 *i8254,
+			 unsigned int counter, unsigned int mode);
+int comedi_8254_load(struct comedi_8254 *i8254,
+		     unsigned int counter, unsigned int val, unsigned int mode);
+
+void comedi_8254_pacer_enable(struct comedi_8254 *i8254,
+			      unsigned int counter1, unsigned int counter2,
+			      bool enable);
+void comedi_8254_update_divisors(struct comedi_8254 *i8254);
+void comedi_8254_cascade_ns_to_timer(struct comedi_8254 *i8254,
+				     unsigned int *nanosec, unsigned int flags);
+void comedi_8254_ns_to_timer(struct comedi_8254 *i8254,
+			     unsigned int *nanosec, unsigned int flags);
+
+void comedi_8254_set_busy(struct comedi_8254 *i8254,
+			  unsigned int counter, bool busy);
+
+void comedi_8254_subdevice_init(struct comedi_subdevice *s,
+				struct comedi_8254 *i8254);
+
+struct comedi_8254 *comedi_8254_init(unsigned long iobase,
+				     unsigned int osc_base,
+				     unsigned int iosize,
+				     unsigned int regshift);
+struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio,
+					unsigned int osc_base,
+					unsigned int iosize,
+					unsigned int regshift);
+
+#endif	/* _COMEDI_8254_H */
-- 
cgit v1.2.3


From fe7a4f5b9548456246ffda143bab59922acda9fd Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Wed, 17 Nov 2021 12:06:03 +0000
Subject: comedi: Move "comedi_isadma.h" to <linux/comedi/comedi_isadma.h>

Some of the header files in "drivers/comedi/drivers/" are common enough
to be useful to out-of-tree comedi driver modules.  Using them for
out-of-tree module builds is hampered by the headers being outside the
"include/" directory so it is desirable to move them.

There are about a half a dozen or so Comedi device drivers that use the
"comedi_isadma" module to add ISA DMA support.  The macros and
declarations to use that module are in the "comedi_isadma.h" header file
in the comedi "drivers" directory.  Move it into
"include/linux/comedi/".

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20211117120604.117740-6-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/comedi/comedi_isadma.h | 114 +++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 include/linux/comedi/comedi_isadma.h

(limited to 'include')

diff --git a/include/linux/comedi/comedi_isadma.h b/include/linux/comedi/comedi_isadma.h
new file mode 100644
index 000000000000..9d2b12db7e6e
--- /dev/null
+++ b/include/linux/comedi/comedi_isadma.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * COMEDI ISA DMA support functions
+ * Copyright (c) 2014 H Hartley Sweeten <hsweeten@visionengravers.com>
+ */
+
+#ifndef _COMEDI_ISADMA_H
+#define _COMEDI_ISADMA_H
+
+#include <linux/types.h>
+
+struct comedi_device;
+struct device;
+
+/*
+ * These are used to avoid issues when <asm/dma.h> and the DMA_MODE_
+ * defines are not available.
+ */
+#define COMEDI_ISADMA_READ	0
+#define COMEDI_ISADMA_WRITE	1
+
+/**
+ * struct comedi_isadma_desc - cookie for ISA DMA
+ * @virt_addr:	virtual address of buffer
+ * @hw_addr:	hardware (bus) address of buffer
+ * @chan:	DMA channel
+ * @maxsize:	allocated size of buffer (in bytes)
+ * @size:	transfer size (in bytes)
+ * @mode:	DMA_MODE_READ or DMA_MODE_WRITE
+ */
+struct comedi_isadma_desc {
+	void *virt_addr;
+	dma_addr_t hw_addr;
+	unsigned int chan;
+	unsigned int maxsize;
+	unsigned int size;
+	char mode;
+};
+
+/**
+ * struct comedi_isadma - ISA DMA data
+ * @dev:	device to allocate non-coherent memory for
+ * @desc:	cookie for each DMA buffer
+ * @n_desc:	the number of cookies
+ * @cur_dma:	the current cookie in use
+ * @chan:	the first DMA channel requested
+ * @chan2:	the second DMA channel requested
+ */
+struct comedi_isadma {
+	struct device *dev;
+	struct comedi_isadma_desc *desc;
+	int n_desc;
+	int cur_dma;
+	unsigned int chan;
+	unsigned int chan2;
+};
+
+#if IS_ENABLED(CONFIG_ISA_DMA_API)
+
+void comedi_isadma_program(struct comedi_isadma_desc *desc);
+unsigned int comedi_isadma_disable(unsigned int dma_chan);
+unsigned int comedi_isadma_disable_on_sample(unsigned int dma_chan,
+					     unsigned int size);
+unsigned int comedi_isadma_poll(struct comedi_isadma *dma);
+void comedi_isadma_set_mode(struct comedi_isadma_desc *desc, char dma_dir);
+
+struct comedi_isadma *comedi_isadma_alloc(struct comedi_device *dev,
+					  int n_desc, unsigned int dma_chan1,
+					  unsigned int dma_chan2,
+					  unsigned int maxsize, char dma_dir);
+void comedi_isadma_free(struct comedi_isadma *dma);
+
+#else	/* !IS_ENABLED(CONFIG_ISA_DMA_API) */
+
+static inline void comedi_isadma_program(struct comedi_isadma_desc *desc)
+{
+}
+
+static inline unsigned int comedi_isadma_disable(unsigned int dma_chan)
+{
+	return 0;
+}
+
+static inline unsigned int
+comedi_isadma_disable_on_sample(unsigned int dma_chan, unsigned int size)
+{
+	return 0;
+}
+
+static inline unsigned int comedi_isadma_poll(struct comedi_isadma *dma)
+{
+	return 0;
+}
+
+static inline void comedi_isadma_set_mode(struct comedi_isadma_desc *desc,
+					  char dma_dir)
+{
+}
+
+static inline struct comedi_isadma *
+comedi_isadma_alloc(struct comedi_device *dev, int n_desc,
+		    unsigned int dma_chan1, unsigned int dma_chan2,
+		    unsigned int maxsize, char dma_dir)
+{
+	return NULL;
+}
+
+static inline void comedi_isadma_free(struct comedi_isadma *dma)
+{
+}
+
+#endif	/* !IS_ENABLED(CONFIG_ISA_DMA_API) */
+
+#endif	/* #ifndef _COMEDI_ISADMA_H */
-- 
cgit v1.2.3


From 2cca3465147d650be3de04927a99784b30251ade Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Fri, 12 Nov 2021 08:28:09 +0200
Subject: mei: bus: add client dma interface

Expose the client dma mapping via mei client bus interface.
The client dma has to be mapped before the device is enabled,
therefore we need to create device linking already during mapping
and we need to unmap after the client is disable hence we need to
postpone the unlink and flush till unmapping or when
destroying the device.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Co-developed-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20210420172755.12178-1-emmanuel.grumbach@intel.com
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20211112062814.7502-1-emmanuel.grumbach@intel.com
---
 include/linux/mei_cl_bus.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index c6786c12b207..df1fab44ea5c 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -117,4 +117,7 @@ int mei_cldev_enable(struct mei_cl_device *cldev);
 int mei_cldev_disable(struct mei_cl_device *cldev);
 bool mei_cldev_enabled(const struct mei_cl_device *cldev);
 
+void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size);
+int mei_cldev_dma_unmap(struct mei_cl_device *cldev);
+
 #endif /* _LINUX_MEI_CL_BUS_H */
-- 
cgit v1.2.3


From 95706be13b9f755d93b5b82bdc782af439f1ec22 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 26 Nov 2021 19:28:42 +0200
Subject: net: mscc: ocelot: create a function that replaces an existing VCAP
 filter

VCAP (Versatile Content Aware Processor) is the TCAM-based engine behind
tc flower offload on ocelot, among other things. The ingress port mask
on which VCAP rules match is present as a bit field in the actual key of
the rule. This means that it is possible for a rule to be shared among
multiple source ports. When the rule is added one by one on each desired
port, that the ingress port mask of the key must be edited and rewritten
to hardware.

But the API in ocelot_vcap.c does not allow for this. For one thing,
ocelot_vcap_filter_add() and ocelot_vcap_filter_del() are not symmetric,
because ocelot_vcap_filter_add() works with a preallocated and
prepopulated filter and programs it to hardware, and
ocelot_vcap_filter_del() does both the job of removing the specified
filter from hardware, as well as kfreeing it. That is to say, the only
option of editing a filter in place, which is to delete it, modify the
structure and add it back, does not work because it results in
use-after-free.

This patch introduces ocelot_vcap_filter_replace, which trivially
reprograms a VCAP entry to hardware, at the exact same index at which it
existed before, without modifying any list or allocating any memory.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/ocelot_vcap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index eeb1142aa1b1..4d1dfa1136b2 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -703,6 +703,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
 			   struct netlink_ext_ack *extack);
 int ocelot_vcap_filter_del(struct ocelot *ocelot,
 			   struct ocelot_vcap_filter *rule);
+int ocelot_vcap_filter_replace(struct ocelot *ocelot,
+			       struct ocelot_vcap_filter *filter);
 struct ocelot_vcap_filter *
 ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block,
 				    unsigned long cookie, bool tc_offload);
-- 
cgit v1.2.3


From ec15baec3272bbec576f2ce7ce47765a8e9b7b1c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 26 Nov 2021 19:28:43 +0200
Subject: net: ptp: add a definition for the UDP port for IEEE 1588 general
 messages

As opposed to event messages (Sync, PdelayReq etc) which require
timestamping, general messages (Announce, FollowUp etc) do not.
In PTP they are part of different streams of data.

IEEE 1588-2008 Annex D.2 "UDP port numbers" states that the UDP
destination port assigned by IANA is 319 for event messages, and 320 for
general messages. Yet the kernel seems to be missing the definition for
general messages. This patch adds it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_classify.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index ae04968a3a47..9afd34a2d36c 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -37,6 +37,7 @@
 #define PTP_MSGTYPE_PDELAY_RESP 0x3
 
 #define PTP_EV_PORT 319
+#define PTP_GEN_PORT 320
 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */
 
 #define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
-- 
cgit v1.2.3


From 4e0d84634445ed550498d613a49ea8f6cfa5e66c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Nov 2021 10:58:40 +0100
Subject: futex: Fix sparc32/m68k/nds32 build regression

The recent futex cleanup series, botched up a rename of some function
names, breaking sparc32, m68k and nds32:

include/asm-generic/futex.h:17:2: error: implicit declaration of function 'futex_atomic_cmpxchg_inatomic_local_generic'; did you mean 'futex_atomic_cmpxchg_inatomic_local'? [-Werror=implicit-function-declaration]

Fix the macros to point to the correct functions.

Fixes: 3f2bedabb62c ("futex: Ensure futex_atomic_cmpxchg_inatomic() is present")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20211126095852.455492-1-arnd@kernel.org
---
 include/asm-generic/futex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 30e7fa63b5df..66d6843bfd02 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -14,9 +14,9 @@
  *
  */
 #define futex_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval) \
-	futex_atomic_cmpxchg_inatomic_local_generic(uval, uaddr, oldval, newval)
+	futex_atomic_cmpxchg_inatomic_local(uval, uaddr, oldval, newval)
 #define arch_futex_atomic_op_inuser(op, oparg, oval, uaddr) \
-	arch_futex_atomic_op_inuser_local_generic(op, oparg, oval, uaddr)
+	futex_atomic_op_inuser_local(op, oparg, oval, uaddr)
 #endif /* CONFIG_SMP */
 #endif
 
-- 
cgit v1.2.3


From e6b4b873896f0e9298f70d25726f4bb1e1b265ba Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Wed, 24 Nov 2021 11:14:29 +0900
Subject: af_unix: Save hash in sk_hash.

To replace unix_table_lock with per-hash locks in the next patch, we need
to save a hash in each socket because /proc/net/unix or BPF prog iterate
sockets while holding a hash table lock and release it later in a different
function.

Currently, we store a real/pseudo hash in struct unix_address.  However, we
do not allocate it to unbound sockets, nor should we do just for that.  For
this purpose, we can use sk_hash.  Then, we no longer use the hash field in
struct unix_address and can remove it.

Also, this patch does
  - rename unix_insert_socket() to unix_insert_unbound_socket()
  - remove the redundant list argument from __unix_insert_socket() and
     unix_insert_unbound_socket()
  - use 'unsigned int' instead of 'unsigned' in __unix_set_addr_hash()
  - remove 'inline' from unix_remove_socket() and
     unix_insert_unbound_socket().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/af_unix.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 7d142e8a0550..89049cc6c066 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -26,7 +26,6 @@ extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 struct unix_address {
 	refcount_t	refcnt;
 	int		len;
-	unsigned int	hash;
 	struct sockaddr_un name[];
 };
 
-- 
cgit v1.2.3


From afd20b9290e184c203fe22f2d6b80dc7127ba724 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Wed, 24 Nov 2021 11:14:30 +0900
Subject: af_unix: Replace the big lock with small locks.

The hash table of AF_UNIX sockets is protected by the single lock.  This
patch replaces it with per-hash locks.

The effect is noticeable when we handle multiple sockets simultaneously.
Here is a test result on an EC2 c5.24xlarge instance.  It shows latency
(under 10us only) in unix_insert_unbound_socket() while 64 CPUs creating
1024 sockets for each in parallel.

  Without this patch:

     nsec          : count     distribution
        0          : 179      |                                        |
        500        : 3021     |*********                               |
        1000       : 6271     |*******************                     |
        1500       : 6318     |*******************                     |
        2000       : 5828     |*****************                       |
        2500       : 5124     |***************                         |
        3000       : 4426     |*************                           |
        3500       : 3672     |***********                             |
        4000       : 3138     |*********                               |
        4500       : 2811     |********                                |
        5000       : 2384     |*******                                 |
        5500       : 2023     |******                                  |
        6000       : 1954     |*****                                   |
        6500       : 1737     |*****                                   |
        7000       : 1749     |*****                                   |
        7500       : 1520     |****                                    |
        8000       : 1469     |****                                    |
        8500       : 1394     |****                                    |
        9000       : 1232     |***                                     |
        9500       : 1138     |***                                     |
        10000      : 994      |***                                     |

  With this patch:

     nsec          : count     distribution
        0          : 1634     |****                                    |
        500        : 13170    |****************************************|
        1000       : 13156    |*************************************** |
        1500       : 9010     |***************************             |
        2000       : 6363     |*******************                     |
        2500       : 4443     |*************                           |
        3000       : 3240     |*********                               |
        3500       : 2549     |*******                                 |
        4000       : 1872     |*****                                   |
        4500       : 1504     |****                                    |
        5000       : 1247     |***                                     |
        5500       : 1035     |***                                     |
        6000       : 889      |**                                      |
        6500       : 744      |**                                      |
        7000       : 634      |*                                       |
        7500       : 498      |*                                       |
        8000       : 433      |*                                       |
        8500       : 355      |*                                       |
        9000       : 336      |*                                       |
        9500       : 284      |                                        |
        10000      : 243      |                                        |

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/af_unix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 89049cc6c066..a7ef624ed726 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -20,7 +20,7 @@ struct sock *unix_peer_get(struct sock *sk);
 #define UNIX_HASH_BITS	8
 
 extern unsigned int unix_tot_inflight;
-extern spinlock_t unix_table_lock;
+extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
 extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 
 struct unix_address {
-- 
cgit v1.2.3


From a6914afcdf0e3fb853fce0e0c04710be7427b62f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 10 Nov 2021 12:31:28 +0200
Subject: kobject: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20211110103128.59888-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index efd56f990a46..c740062b4b1a 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -19,10 +19,10 @@
 #include <linux/list.h>
 #include <linux/sysfs.h>
 #include <linux/compiler.h>
+#include <linux/container_of.h>
 #include <linux/spinlock.h>
 #include <linux/kref.h>
 #include <linux/kobject_ns.h>
-#include <linux/kernel.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
-- 
cgit v1.2.3


From 6a2d2ddf2c345e0149bfbffdddc4768a9ab0a741 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 12 Nov 2021 14:32:27 +0100
Subject: drm: Move nomodeset kernel parameter to the DRM subsystem

The "nomodeset" kernel cmdline parameter is handled by the vgacon driver
but the exported vgacon_text_force() symbol is only used by DRM drivers.

It makes much more sense for the parameter logic to be in the subsystem
of the drivers that are making use of it.

Let's move the vgacon_text_force() function and related logic to the DRM
subsystem. While doing that, rename it to drm_firmware_drivers_only() and
make it return true if "nomodeset" was used and false otherwise. This is
a better description of the condition that the drivers are testing for.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211112133230.1595307-4-javierm@redhat.com
---
 include/drm/drm_drv.h   | 5 +++++
 include/linux/console.h | 6 ------
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index a84eb4028e5b..89e26a732175 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -601,5 +601,10 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev)
 
 int drm_dev_set_unique(struct drm_device *dev, const char *name);
 
+#ifdef CONFIG_VGA_CONSOLE
+extern bool drm_firmware_drivers_only(void);
+#else
+static inline bool drm_firmware_drivers_only(void) { return false; }
+#endif
 
 #endif
diff --git a/include/linux/console.h b/include/linux/console.h
index a97f277cfdfa..7cd758a4f44e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -219,12 +219,6 @@ extern atomic_t ignore_console_lock_warning;
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
-#ifdef CONFIG_VGA_CONSOLE
-extern bool vgacon_text_force(void);
-#else
-static inline bool vgacon_text_force(void) { return false; }
-#endif
-
 extern void console_init(void);
 
 /* For deferred console takeover */
-- 
cgit v1.2.3


From e9aeeba26a8de1f553305722d017022ae7e79280 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 12 Nov 2021 14:32:28 +0100
Subject: drm: Decouple nomodeset from CONFIG_VGA_CONSOLE

This relationship was only for historical reasons and the nomodeset option
should be available even on platforms that don't enable CONFIG_VGA_CONSOLE.

Suggested-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211112133230.1595307-5-javierm@redhat.com
---
 include/drm/drm_drv.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 89e26a732175..da0c836fe8e1 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -601,10 +601,6 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev)
 
 int drm_dev_set_unique(struct drm_device *dev, const char *name);
 
-#ifdef CONFIG_VGA_CONSOLE
 extern bool drm_firmware_drivers_only(void);
-#else
-static inline bool drm_firmware_drivers_only(void) { return false; }
-#endif
 
 #endif
-- 
cgit v1.2.3


From ed14e769f64311769dcf20dde544b82c158d01b1 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Mon, 15 Nov 2021 14:19:12 +0000
Subject: iio: buffer-dma: Remove unused iio_buffer_block struct

This structure was never used anywhere, so it can safely be dropped.

It will later be re-introduced as a different structure in a
different header.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Link: https://lore.kernel.org/r/20211115141925.60164-3-paul@crapouillou.net
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/buffer-dma.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h
index ff15c61bf319..6564bdcdac66 100644
--- a/include/linux/iio/buffer-dma.h
+++ b/include/linux/iio/buffer-dma.h
@@ -17,11 +17,6 @@ struct iio_dma_buffer_queue;
 struct iio_dma_buffer_ops;
 struct device;
 
-struct iio_buffer_block {
-	u32 size;
-	u32 bytes_used;
-};
-
 /**
  * enum iio_block_state - State of a struct iio_dma_buffer_block
  * @IIO_BLOCK_STATE_DEQUEUED: Block is not queued
-- 
cgit v1.2.3


From ffc7c5172a6d1f7ec468066a7172ce65baf1e3e1 Mon Sep 17 00:00:00 2001
From: Antoniu Miclaus <antoniu.miclaus@analog.com>
Date: Fri, 19 Nov 2021 10:56:27 +0200
Subject: iio: expose shared parameter in IIO_ENUM_AVAILABLE

The shared parameter should be configurable based on its usage, and not
constrained to IIO_SHARED_BY_TYPE.

This patch aims to improve the flexibility in using the
IIO_ENUM_AVAILABLE define and avoid redefining custom iio enums that
expose the shared parameter.

An example is the ad5766.c driver where IIO_ENUM_AVAILABLE_SHARED was
defined in order to achieve `shared` parameter customization.

The current state of the IIO_ENUM_AVAILABLE implementation will imply
similar redefinitions each time a driver will require access to the
`shared` parameter. An example would be admv1013 driver which will
require custom device attribute for the frequency translation modes:
Quadrature I/Q mode and Intermediate Frequency mode.

Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Reviewed-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Link: https://lore.kernel.org/r/20211119085627.6348-1-antoniu.miclaus@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 324561b7a5e8..07025d6b3de1 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -103,15 +103,16 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev,
 /**
  * IIO_ENUM_AVAILABLE() - Initialize enum available extended channel attribute
  * @_name:	Attribute name ("_available" will be appended to the name)
+ * @_shared:	Whether the attribute is shared between all channels
  * @_e:		Pointer to an iio_enum struct
  *
  * Creates a read only attribute which lists all the available enum items in a
  * space separated list. This should usually be used together with IIO_ENUM()
  */
-#define IIO_ENUM_AVAILABLE(_name, _e) \
+#define IIO_ENUM_AVAILABLE(_name, _shared, _e) \
 { \
 	.name = (_name "_available"), \
-	.shared = IIO_SHARED_BY_TYPE, \
+	.shared = _shared, \
 	.read = iio_enum_available_read, \
 	.private = (uintptr_t)(_e), \
 }
-- 
cgit v1.2.3


From 447a39f4e89d992f82f03521d46746f6a4348578 Mon Sep 17 00:00:00 2001
From: Sankeerth Billakanti <quic_sbillaka@quicinc.com>
Date: Tue, 2 Nov 2021 13:18:43 +0530
Subject: drm/dp: Add macro to check max_downspread capability

Add a macro to check for the max_downspread capability in
drm_dp_helper.

Signed-off-by: Sankeerth Billakanti <quic_sbillaka@quicinc.com>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>

changes in v4:
    - Return 1 for DPCD version >= v1.1 (Stephen Boyd)
Link: https://lore.kernel.org/r/1635839325-401-4-git-send-email-quic_sbillaka@quicinc.com

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 include/drm/drm_dp_helper.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index b52df4db3e8f..596635ceb9b2 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -1766,6 +1766,13 @@ drm_dp_tps3_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
 		dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED;
 }
 
+static inline bool
+drm_dp_max_downspread(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
+{
+	return dpcd[DP_DPCD_REV] >= 0x11 ||
+		dpcd[DP_MAX_DOWNSPREAD] & DP_MAX_DOWNSPREAD_0_5;
+}
+
 static inline bool
 drm_dp_tps4_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
 {
-- 
cgit v1.2.3


From 75c5bd68b699bbcb6d25879644d62de4da14ab92 Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Fri, 26 Nov 2021 10:48:19 +0100
Subject: ieee80211: change HE nominal packet padding value defines

It's easier to use and understand, and to extend for EHT later,
if we use the values here instead of the shifted values.

Unfortunately, we need to add _POS so that we can use it in
places like iwlwifi/mvm where constants are needed.

While at it, fix the typo ("NOMIMAL") which also helps catch any
conflicts.

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://lore.kernel.org/r/20211126104817.7c29a05b8eb5.I2ca9faf06e177e3035bec91e2ae53c2f91d41774@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 11d7af260f20..559b6c644938 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2258,11 +2258,12 @@ enum ieee80211_client_reg_power {
 #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU		0x08
 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB	0x10
 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB	0x20
-#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US			0x00
-#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US			0x40
-#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US			0x80
-#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED		0xc0
-#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK			0xc0
+#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US			0x0
+#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US			0x1
+#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US			0x2
+#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED		0x3
+#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS			6
+#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK			0xc0
 
 #define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF			0x01
 
-- 
cgit v1.2.3


From 4ba0b2c294fe691921271372f7b59e5cc2ce4b0f Mon Sep 17 00:00:00 2001
From: Russ Weight <russell.h.weight@intel.com>
Date: Thu, 18 Nov 2021 17:55:51 -0800
Subject: fpga: mgr: Use standard dev_release for class driver

The FPGA manager class driver data structure is being treated as a
managed resource instead of using the standard dev_release call-back
function to release the class data structure. This change removes
the managed resource code for the freeing of the class data structure
and combines the create() and register() functions into a single
register() or register_full() function.

The register_full() function accepts an info data structure to provide
flexibility in passing optional parameters. The register() function
supports the current parameter list for users that don't require the
use of optional parameters.

The devm_fpga_mgr_register() function is retained, and the
devm_fpga_mgr_register_full() function is added.

Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Acked-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
---
 include/linux/fpga/fpga-mgr.h | 62 ++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 474c1f506307..0f9468771bb9 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -105,6 +105,36 @@ struct fpga_image_info {
 #endif
 };
 
+/**
+ * struct fpga_compat_id - id for compatibility check
+ *
+ * @id_h: high 64bit of the compat_id
+ * @id_l: low 64bit of the compat_id
+ */
+struct fpga_compat_id {
+	u64 id_h;
+	u64 id_l;
+};
+
+/**
+ * struct fpga_manager_info - collection of parameters for an FPGA Manager
+ * @name: fpga manager name
+ * @compat_id: FPGA manager id for compatibility check.
+ * @mops: pointer to structure of fpga manager ops
+ * @priv: fpga manager private data
+ *
+ * fpga_manager_info contains parameters for the register_full function.
+ * These are separated into an info structure because they some are optional
+ * others could be added to in the future. The info structure facilitates
+ * maintaining a stable API.
+ */
+struct fpga_manager_info {
+	const char *name;
+	struct fpga_compat_id *compat_id;
+	const struct fpga_manager_ops *mops;
+	void *priv;
+};
+
 /**
  * struct fpga_manager_ops - ops for low level fpga manager drivers
  * @initial_header_size: Maximum number of bytes that should be passed into write_init
@@ -143,17 +173,6 @@ struct fpga_manager_ops {
 #define FPGA_MGR_STATUS_IP_PROTOCOL_ERR		BIT(3)
 #define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR	BIT(4)
 
-/**
- * struct fpga_compat_id - id for compatibility check
- *
- * @id_h: high 64bit of the compat_id
- * @id_l: low 64bit of the compat_id
- */
-struct fpga_compat_id {
-	u64 id_h;
-	u64 id_l;
-};
-
 /**
  * struct fpga_manager - fpga manager structure
  * @name: name of low level fpga manager
@@ -191,17 +210,18 @@ struct fpga_manager *fpga_mgr_get(struct device *dev);
 
 void fpga_mgr_put(struct fpga_manager *mgr);
 
-struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name,
-				     const struct fpga_manager_ops *mops,
-				     void *priv);
-void fpga_mgr_free(struct fpga_manager *mgr);
-int fpga_mgr_register(struct fpga_manager *mgr);
-void fpga_mgr_unregister(struct fpga_manager *mgr);
+struct fpga_manager *
+fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info);
 
-int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr);
+struct fpga_manager *
+fpga_mgr_register(struct device *parent, const char *name,
+		  const struct fpga_manager_ops *mops, void *priv);
+void fpga_mgr_unregister(struct fpga_manager *mgr);
 
-struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name,
-					  const struct fpga_manager_ops *mops,
-					  void *priv);
+struct fpga_manager *
+devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info);
+struct fpga_manager *
+devm_fpga_mgr_register(struct device *parent, const char *name,
+		       const struct fpga_manager_ops *mops, void *priv);
 
 #endif /*_LINUX_FPGA_MGR_H */
-- 
cgit v1.2.3


From 0d70af3c2530a70f1b2c197feaa63fbd3548ce34 Mon Sep 17 00:00:00 2001
From: Russ Weight <russell.h.weight@intel.com>
Date: Thu, 18 Nov 2021 17:55:52 -0800
Subject: fpga: bridge: Use standard dev_release for class driver

The FPGA bridge class driver data structure is being treated as a
managed resource instead of using the standard dev_release call-back
function to release the class data structure. This change removes
the managed resource code and combines the create() and register()
functions into a single register() function.

Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Acked-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
---
 include/linux/fpga/fpga-bridge.h | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h
index 6c3c28806ff1..223da48a6d18 100644
--- a/include/linux/fpga/fpga-bridge.h
+++ b/include/linux/fpga/fpga-bridge.h
@@ -22,6 +22,23 @@ struct fpga_bridge_ops {
 	const struct attribute_group **groups;
 };
 
+/**
+ * struct fpga_bridge_info - collection of parameters an FPGA Bridge
+ * @name: fpga bridge name
+ * @br_ops: pointer to structure of fpga bridge ops
+ * @priv: fpga bridge private data
+ *
+ * fpga_bridge_info contains parameters for the register function. These
+ * are separated into an info structure because they some are optional
+ * others could be added to in the future. The info structure facilitates
+ * maintaining a stable API.
+ */
+struct fpga_bridge_info {
+	const char *name;
+	const struct fpga_bridge_ops *br_ops;
+	void *priv;
+};
+
 /**
  * struct fpga_bridge - FPGA bridge structure
  * @name: name of low level FPGA bridge
@@ -62,15 +79,10 @@ int of_fpga_bridge_get_to_list(struct device_node *np,
 			       struct fpga_image_info *info,
 			       struct list_head *bridge_list);
 
-struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name,
-				       const struct fpga_bridge_ops *br_ops,
-				       void *priv);
-void fpga_bridge_free(struct fpga_bridge *br);
-int fpga_bridge_register(struct fpga_bridge *br);
+struct fpga_bridge *
+fpga_bridge_register(struct device *parent, const char *name,
+		     const struct fpga_bridge_ops *br_ops,
+		     void *priv);
 void fpga_bridge_unregister(struct fpga_bridge *br);
 
-struct fpga_bridge
-*devm_fpga_bridge_create(struct device *dev, const char *name,
-			 const struct fpga_bridge_ops *br_ops, void *priv);
-
 #endif /* _LINUX_FPGA_BRIDGE_H */
-- 
cgit v1.2.3


From 8886a579744fbfa53e69aa453ed10ae3b1f9abac Mon Sep 17 00:00:00 2001
From: Russ Weight <russell.h.weight@intel.com>
Date: Thu, 18 Nov 2021 17:55:53 -0800
Subject: fpga: region: Use standard dev_release for class driver

The FPGA region class driver data structure is being treated as a
managed resource instead of using the standard dev_release call-back
function to release the class data structure. This change removes the
managed resource code and combines the create() and register()
functions into a single register() or register_full() function.

The register_full() function accepts an info data structure to provide
flexibility in passing optional parameters. The register() function
supports the current parameter list for users that don't require the
use of optional parameters.

Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Acked-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
---
 include/linux/fpga/fpga-region.h | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index 27cb706275db..3b87f232425c 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -7,6 +7,27 @@
 #include <linux/fpga/fpga-mgr.h>
 #include <linux/fpga/fpga-bridge.h>
 
+struct fpga_region;
+
+/**
+ * struct fpga_region_info - collection of parameters an FPGA Region
+ * @mgr: fpga region manager
+ * @compat_id: FPGA region id for compatibility check.
+ * @priv: fpga region private data
+ * @get_bridges: optional function to get bridges to a list
+ *
+ * fpga_region_info contains parameters for the register_full function.
+ * These are separated into an info structure because they some are optional
+ * others could be added to in the future. The info structure facilitates
+ * maintaining a stable API.
+ */
+struct fpga_region_info {
+	struct fpga_manager *mgr;
+	struct fpga_compat_id *compat_id;
+	void *priv;
+	int (*get_bridges)(struct fpga_region *region);
+};
+
 /**
  * struct fpga_region - FPGA Region structure
  * @dev: FPGA Region device
@@ -37,15 +58,12 @@ struct fpga_region *fpga_region_class_find(
 
 int fpga_region_program_fpga(struct fpga_region *region);
 
-struct fpga_region
-*fpga_region_create(struct device *dev, struct fpga_manager *mgr,
-		    int (*get_bridges)(struct fpga_region *));
-void fpga_region_free(struct fpga_region *region);
-int fpga_region_register(struct fpga_region *region);
-void fpga_region_unregister(struct fpga_region *region);
+struct fpga_region *
+fpga_region_register_full(struct device *parent, const struct fpga_region_info *info);
 
-struct fpga_region
-*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr,
-			int (*get_bridges)(struct fpga_region *));
+struct fpga_region *
+fpga_region_register(struct device *parent, struct fpga_manager *mgr,
+		     int (*get_bridges)(struct fpga_region *));
+void fpga_region_unregister(struct fpga_region *region);
 
 #endif /* _FPGA_REGION_H */
-- 
cgit v1.2.3


From 7e78781df491e4beb475bac22e6c44236a5002d7 Mon Sep 17 00:00:00 2001
From: Gurchetan Singh <gurchetansingh@chromium.org>
Date: Mon, 22 Nov 2021 15:22:09 -0800
Subject: drm/virtgpu api: define a dummy fence signaled event

The current virtgpu implementation of poll(..) drops events
when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is enabled (otherwise
it's like a normal DRM driver).

This is because paravirtualized userspaces receives responses in a
buffer of type BLOB_MEM_GUEST, not by read(..).

To be in line with other DRM drivers and avoid specialized behavior,
it is possible to define a dummy event for virtgpu.  Paravirtualized
userspace will now have to call read(..) on the DRM fd to receive the
dummy event.

Fixes: b10790434cf2 ("drm/virtgpu api: create context init feature")
Reported-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20211122232210.602-2-gurchetansingh@google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/drm/virtgpu_drm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index a13e20cc66b4..0512fde5e697 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -196,6 +196,13 @@ struct drm_virtgpu_context_init {
 	__u64 ctx_set_params;
 };
 
+/*
+ * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in
+ * effect.  The event size is sizeof(drm_event), since there is no additional
+ * payload.
+ */
+#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000
+
 #define DRM_IOCTL_VIRTGPU_MAP \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
 
-- 
cgit v1.2.3


From 306456c21c792ac633e660bc45f0854b612a0e98 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 16 Nov 2021 14:54:35 +0200
Subject: mfd: bd70528: Drop BD70528 support

The only known BD70528 use-cases are such that the PMIC is controlled
from separate MCU which is not running Linux. I am not aware of
any Linux driver users. Furthermore, it seems there is no demand for
this IC. Let's ease the maintenance burden and drop the driver. We can
always add it back if there is sudden need for it.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/cf7dfd98b3403ad363b2b48b57bdbfd57a6416cb.1637066805.git.matti.vaittinen@fi.rohmeurope.com
---
 include/linux/mfd/rohm-bd70528.h | 389 ---------------------------------------
 include/linux/mfd/rohm-generic.h |   1 -
 2 files changed, 390 deletions(-)
 delete mode 100644 include/linux/mfd/rohm-bd70528.h

(limited to 'include')

diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h
deleted file mode 100644
index 4a5966475a35..000000000000
--- a/include/linux/mfd/rohm-bd70528.h
+++ /dev/null
@@ -1,389 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright (C) 2018 ROHM Semiconductors */
-
-#ifndef __LINUX_MFD_BD70528_H__
-#define __LINUX_MFD_BD70528_H__
-
-#include <linux/bits.h>
-#include <linux/device.h>
-#include <linux/mfd/rohm-generic.h>
-#include <linux/mfd/rohm-shared.h>
-#include <linux/regmap.h>
-
-enum {
-	BD70528_BUCK1,
-	BD70528_BUCK2,
-	BD70528_BUCK3,
-	BD70528_LDO1,
-	BD70528_LDO2,
-	BD70528_LDO3,
-	BD70528_LED1,
-	BD70528_LED2,
-};
-
-struct bd70528_data {
-	struct rohm_regmap_dev chip;
-	struct mutex rtc_timer_lock;
-};
-
-#define BD70528_BUCK_VOLTS 0x10
-#define BD70528_LDO_VOLTS 0x20
-
-#define BD70528_REG_BUCK1_EN	0x0F
-#define BD70528_REG_BUCK1_VOLT	0x15
-#define BD70528_REG_BUCK2_EN	0x10
-#define BD70528_REG_BUCK2_VOLT	0x16
-#define BD70528_REG_BUCK3_EN	0x11
-#define BD70528_REG_BUCK3_VOLT	0x17
-#define BD70528_REG_LDO1_EN	0x1b
-#define BD70528_REG_LDO1_VOLT	0x1e
-#define BD70528_REG_LDO2_EN	0x1c
-#define BD70528_REG_LDO2_VOLT	0x1f
-#define BD70528_REG_LDO3_EN	0x1d
-#define BD70528_REG_LDO3_VOLT	0x20
-#define BD70528_REG_LED_CTRL	0x2b
-#define BD70528_REG_LED_VOLT	0x29
-#define BD70528_REG_LED_EN	0x2a
-
-/* main irq registers */
-#define BD70528_REG_INT_MAIN	0x7E
-#define BD70528_REG_INT_MAIN_MASK 0x74
-
-/* 'sub irq' registers */
-#define BD70528_REG_INT_SHDN	0x7F
-#define BD70528_REG_INT_PWR_FLT	0x80
-#define BD70528_REG_INT_VR_FLT	0x81
-#define BD70528_REG_INT_MISC	0x82
-#define BD70528_REG_INT_BAT1	0x83
-#define BD70528_REG_INT_BAT2	0x84
-#define BD70528_REG_INT_RTC	0x85
-#define BD70528_REG_INT_GPIO	0x86
-#define BD70528_REG_INT_OP_FAIL	0x87
-
-#define BD70528_REG_INT_SHDN_MASK	0x75
-#define BD70528_REG_INT_PWR_FLT_MASK	0x76
-#define BD70528_REG_INT_VR_FLT_MASK	0x77
-#define BD70528_REG_INT_MISC_MASK	0x78
-#define BD70528_REG_INT_BAT1_MASK	0x79
-#define BD70528_REG_INT_BAT2_MASK	0x7a
-#define BD70528_REG_INT_RTC_MASK	0x7b
-#define BD70528_REG_INT_GPIO_MASK	0x7c
-#define BD70528_REG_INT_OP_FAIL_MASK	0x7d
-
-/* Reset related 'magic' registers */
-#define BD70528_REG_SHIPMODE	0x03
-#define BD70528_REG_HWRESET	0x04
-#define BD70528_REG_WARMRESET	0x05
-#define BD70528_REG_STANDBY	0x06
-
-/* GPIO registers */
-#define BD70528_REG_GPIO_STATE	0x8F
-
-#define BD70528_REG_GPIO1_IN	0x4d
-#define BD70528_REG_GPIO2_IN	0x4f
-#define BD70528_REG_GPIO3_IN	0x51
-#define BD70528_REG_GPIO4_IN	0x53
-#define BD70528_REG_GPIO1_OUT	0x4e
-#define BD70528_REG_GPIO2_OUT	0x50
-#define BD70528_REG_GPIO3_OUT	0x52
-#define BD70528_REG_GPIO4_OUT	0x54
-
-/* RTC */
-
-#define BD70528_REG_RTC_COUNT_H		0x2d
-#define BD70528_REG_RTC_COUNT_L		0x2e
-#define BD70528_REG_RTC_SEC		0x2f
-#define BD70528_REG_RTC_MINUTE		0x30
-#define BD70528_REG_RTC_HOUR		0x31
-#define BD70528_REG_RTC_WEEK		0x32
-#define BD70528_REG_RTC_DAY		0x33
-#define BD70528_REG_RTC_MONTH		0x34
-#define BD70528_REG_RTC_YEAR		0x35
-
-#define BD70528_REG_RTC_ALM_SEC		0x36
-#define BD70528_REG_RTC_ALM_START	BD70528_REG_RTC_ALM_SEC
-#define BD70528_REG_RTC_ALM_MINUTE	0x37
-#define BD70528_REG_RTC_ALM_HOUR	0x38
-#define BD70528_REG_RTC_ALM_WEEK	0x39
-#define BD70528_REG_RTC_ALM_DAY		0x3a
-#define BD70528_REG_RTC_ALM_MONTH	0x3b
-#define BD70528_REG_RTC_ALM_YEAR	0x3c
-#define BD70528_REG_RTC_ALM_MASK	0x3d
-#define BD70528_REG_RTC_ALM_REPEAT	0x3e
-#define BD70528_REG_RTC_START		BD70528_REG_RTC_SEC
-
-#define BD70528_REG_RTC_WAKE_SEC	0x43
-#define BD70528_REG_RTC_WAKE_START	BD70528_REG_RTC_WAKE_SEC
-#define BD70528_REG_RTC_WAKE_MIN	0x44
-#define BD70528_REG_RTC_WAKE_HOUR	0x45
-#define BD70528_REG_RTC_WAKE_CTRL	0x46
-
-#define BD70528_REG_ELAPSED_TIMER_EN	0x42
-#define BD70528_REG_WAKE_EN		0x46
-
-/* WDT registers */
-#define BD70528_REG_WDT_CTRL		0x4A
-#define BD70528_REG_WDT_HOUR		0x49
-#define BD70528_REG_WDT_MINUTE		0x48
-#define BD70528_REG_WDT_SEC		0x47
-
-/* Charger / Battery */
-#define BD70528_REG_CHG_CURR_STAT	0x59
-#define BD70528_REG_CHG_BAT_STAT	0x57
-#define BD70528_REG_CHG_BAT_TEMP	0x58
-#define BD70528_REG_CHG_IN_STAT		0x56
-#define BD70528_REG_CHG_DCIN_ILIM	0x5d
-#define BD70528_REG_CHG_CHG_CURR_WARM	0x61
-#define BD70528_REG_CHG_CHG_CURR_COLD	0x62
-
-/* Masks for main IRQ register bits */
-enum {
-	BD70528_INT_SHDN,
-#define BD70528_INT_SHDN_MASK BIT(BD70528_INT_SHDN)
-	BD70528_INT_PWR_FLT,
-#define BD70528_INT_PWR_FLT_MASK BIT(BD70528_INT_PWR_FLT)
-	BD70528_INT_VR_FLT,
-#define BD70528_INT_VR_FLT_MASK BIT(BD70528_INT_VR_FLT)
-	BD70528_INT_MISC,
-#define BD70528_INT_MISC_MASK BIT(BD70528_INT_MISC)
-	BD70528_INT_BAT1,
-#define BD70528_INT_BAT1_MASK BIT(BD70528_INT_BAT1)
-	BD70528_INT_RTC,
-#define BD70528_INT_RTC_MASK BIT(BD70528_INT_RTC)
-	BD70528_INT_GPIO,
-#define BD70528_INT_GPIO_MASK BIT(BD70528_INT_GPIO)
-	BD70528_INT_OP_FAIL,
-#define BD70528_INT_OP_FAIL_MASK BIT(BD70528_INT_OP_FAIL)
-};
-
-/* IRQs */
-enum {
-	/* Shutdown register IRQs */
-	BD70528_INT_LONGPUSH,
-	BD70528_INT_WDT,
-	BD70528_INT_HWRESET,
-	BD70528_INT_RSTB_FAULT,
-	BD70528_INT_VBAT_UVLO,
-	BD70528_INT_TSD,
-	BD70528_INT_RSTIN,
-	/* Power failure register IRQs */
-	BD70528_INT_BUCK1_FAULT,
-	BD70528_INT_BUCK2_FAULT,
-	BD70528_INT_BUCK3_FAULT,
-	BD70528_INT_LDO1_FAULT,
-	BD70528_INT_LDO2_FAULT,
-	BD70528_INT_LDO3_FAULT,
-	BD70528_INT_LED1_FAULT,
-	BD70528_INT_LED2_FAULT,
-	/* VR FAULT register IRQs */
-	BD70528_INT_BUCK1_OCP,
-	BD70528_INT_BUCK2_OCP,
-	BD70528_INT_BUCK3_OCP,
-	BD70528_INT_LED1_OCP,
-	BD70528_INT_LED2_OCP,
-	BD70528_INT_BUCK1_FULLON,
-	BD70528_INT_BUCK2_FULLON,
-	/* PMU register interrupts */
-	BD70528_INT_SHORTPUSH,
-	BD70528_INT_AUTO_WAKEUP,
-	BD70528_INT_STATE_CHANGE,
-	/* Charger 1 register IRQs */
-	BD70528_INT_BAT_OV_RES,
-	BD70528_INT_BAT_OV_DET,
-	BD70528_INT_DBAT_DET,
-	BD70528_INT_BATTSD_COLD_RES,
-	BD70528_INT_BATTSD_COLD_DET,
-	BD70528_INT_BATTSD_HOT_RES,
-	BD70528_INT_BATTSD_HOT_DET,
-	BD70528_INT_CHG_TSD,
-	/* Charger 2 register IRQs */
-	BD70528_INT_BAT_RMV,
-	BD70528_INT_BAT_DET,
-	BD70528_INT_DCIN2_OV_RES,
-	BD70528_INT_DCIN2_OV_DET,
-	BD70528_INT_DCIN2_RMV,
-	BD70528_INT_DCIN2_DET,
-	BD70528_INT_DCIN1_RMV,
-	BD70528_INT_DCIN1_DET,
-	/* RTC register IRQs */
-	BD70528_INT_RTC_ALARM,
-	BD70528_INT_ELPS_TIM,
-	/* GPIO register IRQs */
-	BD70528_INT_GPIO0,
-	BD70528_INT_GPIO1,
-	BD70528_INT_GPIO2,
-	BD70528_INT_GPIO3,
-	/* Invalid operation register IRQs */
-	BD70528_INT_BUCK1_DVS_OPFAIL,
-	BD70528_INT_BUCK2_DVS_OPFAIL,
-	BD70528_INT_BUCK3_DVS_OPFAIL,
-	BD70528_INT_LED1_VOLT_OPFAIL,
-	BD70528_INT_LED2_VOLT_OPFAIL,
-};
-
-/* Masks */
-#define BD70528_INT_LONGPUSH_MASK 0x1
-#define BD70528_INT_WDT_MASK 0x2
-#define BD70528_INT_HWRESET_MASK 0x4
-#define BD70528_INT_RSTB_FAULT_MASK 0x8
-#define BD70528_INT_VBAT_UVLO_MASK 0x10
-#define BD70528_INT_TSD_MASK 0x20
-#define BD70528_INT_RSTIN_MASK 0x40
-
-#define BD70528_INT_BUCK1_FAULT_MASK 0x1
-#define BD70528_INT_BUCK2_FAULT_MASK 0x2
-#define BD70528_INT_BUCK3_FAULT_MASK 0x4
-#define BD70528_INT_LDO1_FAULT_MASK 0x8
-#define BD70528_INT_LDO2_FAULT_MASK 0x10
-#define BD70528_INT_LDO3_FAULT_MASK 0x20
-#define BD70528_INT_LED1_FAULT_MASK 0x40
-#define BD70528_INT_LED2_FAULT_MASK 0x80
-
-#define BD70528_INT_BUCK1_OCP_MASK 0x1
-#define BD70528_INT_BUCK2_OCP_MASK 0x2
-#define BD70528_INT_BUCK3_OCP_MASK 0x4
-#define BD70528_INT_LED1_OCP_MASK 0x8
-#define BD70528_INT_LED2_OCP_MASK 0x10
-#define BD70528_INT_BUCK1_FULLON_MASK 0x20
-#define BD70528_INT_BUCK2_FULLON_MASK 0x40
-
-#define BD70528_INT_SHORTPUSH_MASK 0x1
-#define BD70528_INT_AUTO_WAKEUP_MASK 0x2
-#define BD70528_INT_STATE_CHANGE_MASK 0x10
-
-#define BD70528_INT_BAT_OV_RES_MASK 0x1
-#define BD70528_INT_BAT_OV_DET_MASK 0x2
-#define BD70528_INT_DBAT_DET_MASK 0x4
-#define BD70528_INT_BATTSD_COLD_RES_MASK 0x8
-#define BD70528_INT_BATTSD_COLD_DET_MASK 0x10
-#define BD70528_INT_BATTSD_HOT_RES_MASK 0x20
-#define BD70528_INT_BATTSD_HOT_DET_MASK 0x40
-#define BD70528_INT_CHG_TSD_MASK 0x80
-
-#define BD70528_INT_BAT_RMV_MASK 0x1
-#define BD70528_INT_BAT_DET_MASK 0x2
-#define BD70528_INT_DCIN2_OV_RES_MASK 0x4
-#define BD70528_INT_DCIN2_OV_DET_MASK 0x8
-#define BD70528_INT_DCIN2_RMV_MASK 0x10
-#define BD70528_INT_DCIN2_DET_MASK 0x20
-#define BD70528_INT_DCIN1_RMV_MASK 0x40
-#define BD70528_INT_DCIN1_DET_MASK 0x80
-
-#define BD70528_INT_RTC_ALARM_MASK 0x1
-#define BD70528_INT_ELPS_TIM_MASK 0x2
-
-#define BD70528_INT_GPIO0_MASK 0x1
-#define BD70528_INT_GPIO1_MASK 0x2
-#define BD70528_INT_GPIO2_MASK 0x4
-#define BD70528_INT_GPIO3_MASK 0x8
-
-#define BD70528_INT_BUCK1_DVS_OPFAIL_MASK 0x1
-#define BD70528_INT_BUCK2_DVS_OPFAIL_MASK 0x2
-#define BD70528_INT_BUCK3_DVS_OPFAIL_MASK 0x4
-#define BD70528_INT_LED1_VOLT_OPFAIL_MASK 0x10
-#define BD70528_INT_LED2_VOLT_OPFAIL_MASK 0x20
-
-#define BD70528_DEBOUNCE_MASK 0x3
-
-#define BD70528_DEBOUNCE_DISABLE 0
-#define BD70528_DEBOUNCE_15MS 1
-#define BD70528_DEBOUNCE_30MS 2
-#define BD70528_DEBOUNCE_50MS 3
-
-#define BD70528_GPIO_DRIVE_MASK 0x2
-#define BD70528_GPIO_PUSH_PULL 0x0
-#define BD70528_GPIO_OPEN_DRAIN 0x2
-
-#define BD70528_GPIO_OUT_EN_MASK 0x80
-#define BD70528_GPIO_OUT_ENABLE 0x80
-#define BD70528_GPIO_OUT_DISABLE 0x0
-
-#define BD70528_GPIO_OUT_HI 0x1
-#define BD70528_GPIO_OUT_LO 0x0
-#define BD70528_GPIO_OUT_MASK 0x1
-
-#define BD70528_GPIO_IN_STATE_BASE 1
-
-/* RTC masks to mask out reserved bits */
-
-#define BD70528_MASK_ELAPSED_TIMER_EN	0x1
-/* Mask second, min and hour fields
- * HW would support ALM irq for over 24h
- * (by setting day, month and year too)
- * but as we wish to keep this same as for
- * wake-up we limit ALM to 24H and only
- * unmask sec, min and hour
- */
-#define BD70528_MASK_WAKE_EN		0x1
-
-/* WDT masks */
-#define BD70528_MASK_WDT_EN		0x1
-#define BD70528_MASK_WDT_HOUR		0x1
-#define BD70528_MASK_WDT_MINUTE		0x7f
-#define BD70528_MASK_WDT_SEC		0x7f
-
-#define BD70528_WDT_STATE_BIT		0x1
-#define BD70528_ELAPSED_STATE_BIT	0x2
-#define BD70528_WAKE_STATE_BIT		0x4
-
-/* Charger masks */
-#define BD70528_MASK_CHG_STAT		0x7f
-#define BD70528_MASK_CHG_BAT_TIMER	0x20
-#define BD70528_MASK_CHG_BAT_OVERVOLT	0x10
-#define BD70528_MASK_CHG_BAT_DETECT	0x1
-#define BD70528_MASK_CHG_DCIN1_UVLO	0x1
-#define BD70528_MASK_CHG_DCIN_ILIM	0x3f
-#define BD70528_MASK_CHG_CHG_CURR	0x1f
-#define BD70528_MASK_CHG_TRICKLE_CURR	0x10
-
-/*
- * Note, external battery register is the lonely rider at
- * address 0xc5. See how to stuff that in the regmap
- */
-#define BD70528_MAX_REGISTER 0x94
-
-/* Buck control masks */
-#define BD70528_MASK_RUN_EN	0x4
-#define BD70528_MASK_STBY_EN	0x2
-#define BD70528_MASK_IDLE_EN	0x1
-#define BD70528_MASK_LED1_EN	0x1
-#define BD70528_MASK_LED2_EN	0x10
-
-#define BD70528_MASK_BUCK_VOLT	0xf
-#define BD70528_MASK_LDO_VOLT	0x1f
-#define BD70528_MASK_LED1_VOLT	0x1
-#define BD70528_MASK_LED2_VOLT	0x10
-
-/* Misc irq masks */
-#define BD70528_INT_MASK_SHORT_PUSH	1
-#define BD70528_INT_MASK_AUTO_WAKE	2
-#define BD70528_INT_MASK_POWER_STATE	4
-
-#define BD70528_MASK_BUCK_RAMP 0x10
-#define BD70528_SIFT_BUCK_RAMP 4
-
-#if IS_ENABLED(CONFIG_BD70528_WATCHDOG)
-
-int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, int *old_state);
-void bd70528_wdt_lock(struct rohm_regmap_dev *data);
-void bd70528_wdt_unlock(struct rohm_regmap_dev *data);
-
-#else /* CONFIG_BD70528_WATCHDOG */
-
-static inline int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable,
-				  int *old_state)
-{
-	return 0;
-}
-
-static inline void bd70528_wdt_lock(struct rohm_regmap_dev *data)
-{
-}
-
-static inline void bd70528_wdt_unlock(struct rohm_regmap_dev *data)
-{
-}
-
-#endif /* CONFIG_BD70528_WATCHDOG */
-
-#endif /* __LINUX_MFD_BD70528_H__ */
diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 35b392a0d73a..8fb763a2265a 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -12,7 +12,6 @@ enum rohm_chip_type {
 	ROHM_CHIP_TYPE_BD9573,
 	ROHM_CHIP_TYPE_BD9574,
 	ROHM_CHIP_TYPE_BD9576,
-	ROHM_CHIP_TYPE_BD70528,
 	ROHM_CHIP_TYPE_BD71815,
 	ROHM_CHIP_TYPE_BD71828,
 	ROHM_CHIP_TYPE_BD71837,
-- 
cgit v1.2.3


From dacb5d8875cc6cd3a553363b4d6f06760fcbe70c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 26 Nov 2021 19:34:21 +0100
Subject: tcp: fix page frag corruption on page fault

Steffen reported a TCP stream corruption for HTTP requests
served by the apache web-server using a cifs mount-point
and memory mapping the relevant file.

The root cause is quite similar to the one addressed by
commit 20eb4f29b602 ("net: fix sk_page_frag() recursion from
memory reclaim"). Here the nested access to the task page frag
is caused by a page fault on the (mmapped) user-space memory
buffer coming from the cifs file.

The page fault handler performs an smb transaction on a different
socket, inside the same process context. Since sk->sk_allaction
for such socket does not prevent the usage for the task_frag,
the nested allocation modify "under the hood" the page frag
in use by the outer sendmsg call, corrupting the stream.

The overall relevant stack trace looks like the following:

httpd 78268 [001] 3461630.850950:      probe:tcp_sendmsg_locked:
        ffffffff91461d91 tcp_sendmsg_locked+0x1
        ffffffff91462b57 tcp_sendmsg+0x27
        ffffffff9139814e sock_sendmsg+0x3e
        ffffffffc06dfe1d smb_send_kvec+0x28
        [...]
        ffffffffc06cfaf8 cifs_readpages+0x213
        ffffffff90e83c4b read_pages+0x6b
        ffffffff90e83f31 __do_page_cache_readahead+0x1c1
        ffffffff90e79e98 filemap_fault+0x788
        ffffffff90eb0458 __do_fault+0x38
        ffffffff90eb5280 do_fault+0x1a0
        ffffffff90eb7c84 __handle_mm_fault+0x4d4
        ffffffff90eb8093 handle_mm_fault+0xc3
        ffffffff90c74f6d __do_page_fault+0x1ed
        ffffffff90c75277 do_page_fault+0x37
        ffffffff9160111e page_fault+0x1e
        ffffffff9109e7b5 copyin+0x25
        ffffffff9109eb40 _copy_from_iter_full+0xe0
        ffffffff91462370 tcp_sendmsg_locked+0x5e0
        ffffffff91462370 tcp_sendmsg_locked+0x5e0
        ffffffff91462b57 tcp_sendmsg+0x27
        ffffffff9139815c sock_sendmsg+0x4c
        ffffffff913981f7 sock_write_iter+0x97
        ffffffff90f2cc56 do_iter_readv_writev+0x156
        ffffffff90f2dff0 do_iter_write+0x80
        ffffffff90f2e1c3 vfs_writev+0xa3
        ffffffff90f2e27c do_writev+0x5c
        ffffffff90c042bb do_syscall_64+0x5b
        ffffffff916000ad entry_SYSCALL_64_after_hwframe+0x65

The cifs filesystem rightfully sets sk_allocations to GFP_NOFS,
we can avoid the nesting using the sk page frag for allocation
lacking the __GFP_FS flag. Do not define an additional mm-helper
for that, as this is strictly tied to the sk page frag usage.

v1 -> v2:
 - use a stricted sk_page_frag() check instead of reordering the
   code (Eric)

Reported-by: Steffen Froemer <sfroemer@redhat.com>
Fixes: 5640f7685831 ("net: use a per task frag allocator")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b32906e1ab55..715cdb4b2b79 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2430,19 +2430,22 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
  * @sk: socket
  *
  * Use the per task page_frag instead of the per socket one for
- * optimization when we know that we're in the normal context and owns
+ * optimization when we know that we're in process context and own
  * everything that's associated with %current.
  *
- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
- * inside other socket operations and end up recursing into sk_page_frag()
- * while it's already in use.
+ * Both direct reclaim and page faults can nest inside other
+ * socket operations and end up recursing into sk_page_frag()
+ * while it's already in use: explicitly avoid task page_frag
+ * usage if the caller is potentially doing any of them.
+ * This assumes that page fault handlers use the GFP_NOFS flags.
  *
  * Return: a per task page_frag if context allows that,
  * otherwise a per socket one.
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-	if (gfpflags_normal_context(sk->sk_allocation))
+	if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
+	    (__GFP_DIRECT_RECLAIM | __GFP_FS))
 		return &current->task_frag;
 
 	return &sk->sk_frag;
-- 
cgit v1.2.3


From 8544f08c816292c2219f28c6eaa69236b978bfb9 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 16 Nov 2021 16:45:12 +0900
Subject: ASoC: soc-dai: update snd_soc_dai_delay() to snd_soc_pcm_dai_delay()

Current soc_pcm_pointer() is manually calculating
both CPU-DAI's   max delay (= A)
and  Codec-DAI's max delay (= B).

	static snd_pcm_uframes_t soc_pcm_pointer(...)
	{
		...
 ^		for_each_rtd_cpu_dais(rtd, i, cpu_dai)
(A)			cpu_delay = max(cpu_delay, ...);
 v		delay += cpu_delay;

 ^		for_each_rtd_codec_dais(rtd, i, codec_dai)
(B)			codec_delay = max(codec_delay, ...);
 v		delay += codec_delay;

		runtime->delay = delay;
		...
	}

Current soc_pcm_pointer() and the total delay calculating
is not readable / difficult to understand.

This patch update snd_soc_dai_delay() to snd_soc_pcm_dai_delay(),
and calcule both CPU/Codec delay in one function.

Link: https://lore.kernel.org/r/87fszl4yrq.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/875yssy25z.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 0dcb361a98bb..5d4dd7c5450b 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -208,8 +208,6 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream);
 void snd_soc_dai_shutdown(struct snd_soc_dai *dai,
 			  struct snd_pcm_substream *substream, int rollback);
-snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
-				    struct snd_pcm_substream *substream);
 void snd_soc_dai_suspend(struct snd_soc_dai *dai);
 void snd_soc_dai_resume(struct snd_soc_dai *dai);
 int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
@@ -238,6 +236,8 @@ int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, int cmd,
 			    int rollback);
 int snd_soc_pcm_dai_bespoke_trigger(struct snd_pcm_substream *substream,
 				    int cmd);
+void snd_soc_pcm_dai_delay(struct snd_pcm_substream *substream,
+			   snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay);
 
 int snd_soc_dai_compr_startup(struct snd_soc_dai *dai,
 			      struct snd_compr_stream *cstream);
-- 
cgit v1.2.3


From 403f830e7a0be5a9e33c7a9d208574f79887ec57 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 16 Nov 2021 16:45:18 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_delay()

Current soc-pcm.c :: soc_pcm_pointer() is assuming that
component driver might update runtime->delay silently in
snd_soc_pcm_component_pointer() (= A).

	static snd_pcm_uframes_t soc_pcm_pointer(...)
	{
		...

		/* clearing the previous total delay */
=>		runtime->delay = 0;

(A)		offset = snd_soc_pcm_component_pointer(substream);

		/* base delay if assigned in pointer callback */
=>		delay = runtime->delay;
		...
	}

1) The behavior that ".pointer callback secretly updates
   runtime->delay" is strange and confusable.

2) Current snd_soc_pcm_component_pointer() uses 1st found component's
   .pointer callback only, thus it is no problem for now.
   But runtime->delay might be overwrote if it adjusted to multiple
   components in the future.

3) Component delay is updated at .pointer callback timing (secretly).
   But some components which doesn't have .pointer callback might want
   to increase runtime->delay for some reasons.

We already have .delay function for DAI, but not have for Component.
This patch adds new snd_soc_pcm_component_delay() for it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/874k8cy25t.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index a4317144ab62..a52080407b98 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -148,6 +148,8 @@ struct snd_soc_component_driver {
 		    struct vm_area_struct *vma);
 	int (*ack)(struct snd_soc_component *component,
 		   struct snd_pcm_substream *substream);
+	snd_pcm_sframes_t (*delay)(struct snd_soc_component *component,
+				   struct snd_pcm_substream *substream);
 
 	const struct snd_compress_ops *compress_ops;
 
@@ -505,5 +507,7 @@ int snd_soc_pcm_component_pm_runtime_get(struct snd_soc_pcm_runtime *rtd,
 void snd_soc_pcm_component_pm_runtime_put(struct snd_soc_pcm_runtime *rtd,
 					  void *stream, int rollback);
 int snd_soc_pcm_component_ack(struct snd_pcm_substream *substream);
+void snd_soc_pcm_component_delay(struct snd_pcm_substream *substream,
+				 snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay);
 
 #endif /* __SOC_COMPONENT_H */
-- 
cgit v1.2.3


From ed618bd80947fa8d9644baf8ac18cb2a02223a5e Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Sat, 27 Nov 2021 17:34:04 +0800
Subject: net: vxlan: add macro definition for number of IANA VXLAN-GPE port

Add macro definition for number of IANA VXLAN-GPE port for generic use.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 08537aa14f7c..5a934bebe630 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -10,6 +10,7 @@
 #include <net/nexthop.h>
 
 #define IANA_VXLAN_UDP_PORT     4789
+#define IANA_VXLAN_GPE_UDP_PORT 4790
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-- 
cgit v1.2.3


From a9c8f68ce2c37ced2f7a8667eda71b7753ede398 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Nov 2021 21:27:22 +0200
Subject: spi: pxa2xx: Get rid of unused ->cs_control()

Since the last user of the custom ->cs_control() gone, we may get rid of
this legacy API completely.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20211123192723.44537-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/pxa2xx_spi.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index eaab121ee575..42e06bfbc2a4 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -9,9 +9,6 @@
 
 #include <linux/pxa2xx_ssp.h>
 
-#define PXA2XX_CS_ASSERT (0x01)
-#define PXA2XX_CS_DEASSERT (0x02)
-
 struct dma_chan;
 
 /*
@@ -47,7 +44,6 @@ struct pxa2xx_spi_chip {
 	u32 timeout;
 	u8 enable_loopback;
 	int gpio_cs;
-	void (*cs_control)(u32 command);
 };
 
 #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
-- 
cgit v1.2.3


From 8393961c53b31078cfc877bc00eb0f67e1474edd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Nov 2021 21:27:23 +0200
Subject: spi: pxa2xx: Get rid of unused enable_loopback member

There is no user of the enable_loopback member in the struct pxa2xx_spi_chip.
Remote this legacy member completely.

The mentioned in the documentation the testing phase can be performed with
spidev_test tool.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20211123192723.44537-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/pxa2xx_spi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 42e06bfbc2a4..ca74dce36706 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -42,7 +42,6 @@ struct pxa2xx_spi_chip {
 	u8 rx_threshold;
 	u8 dma_burst_size;
 	u32 timeout;
-	u8 enable_loopback;
 	int gpio_cs;
 };
 
-- 
cgit v1.2.3


From b99658452355d316debee11079e8f1c6c1029355 Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Sun, 28 Nov 2021 17:57:37 -0800
Subject: net: dsa: ocelot: felix: utilize shared mscc-miim driver for indirect
 MDIO access

Switch to a shared MDIO access implementation by way of the mdio-mscc-miim
driver.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio/mdio-mscc-miim.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 include/linux/mdio/mdio-mscc-miim.h

(limited to 'include')

diff --git a/include/linux/mdio/mdio-mscc-miim.h b/include/linux/mdio/mdio-mscc-miim.h
new file mode 100644
index 000000000000..5b4ed2c3cbb9
--- /dev/null
+++ b/include/linux/mdio/mdio-mscc-miim.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Driver for the MDIO interface of Microsemi network switches.
+ *
+ * Author: Colin Foster <colin.foster@in-advantage.com>
+ * Copyright (C) 2021 Innovative Advantage
+ */
+#ifndef MDIO_MSCC_MIIM_H
+#define MDIO_MSCC_MIIM_H
+
+#include <linux/device.h>
+#include <linux/phy.h>
+#include <linux/regmap.h>
+
+int mscc_miim_setup(struct device *device, struct mii_bus **bus,
+		    const char *name, struct regmap *mii_regmap,
+		    int status_offset);
+
+#endif
-- 
cgit v1.2.3


From aeeecb889165617a841e939117f9a8095d0e7d80 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Sun, 28 Nov 2021 14:01:02 +0800
Subject: net: snmp: add statistics for tcp small queue check

Once tcp small queue check failed in tcp_small_queue_check(), the
throughput of tcp will be limited, and it's hard to distinguish
whether it is out of tcp congestion control.

Add statistics of LINUX_MIB_TCPSMALLQUEUEFAILURE for this scene.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 904909d020e2..e32ec6932e82 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -292,6 +292,7 @@ enum
 	LINUX_MIB_TCPDSACKIGNOREDDUBIOUS,	/* TCPDSACKIgnoredDubious */
 	LINUX_MIB_TCPMIGRATEREQSUCCESS,		/* TCPMigrateReqSuccess */
 	LINUX_MIB_TCPMIGRATEREQFAILURE,		/* TCPMigrateReqFailure */
+	LINUX_MIB_TCPSMALLQUEUEFAILURE,		/* TCPSmallQueueFailure */
 	__LINUX_MIB_MAX
 };
 
-- 
cgit v1.2.3


From 17247821ae9b40ea6df8d771cfca97d91675be93 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Thu, 25 Nov 2021 23:46:42 +0100
Subject: mfd: ti_am335x_tscadc: Drop the CNTRLREG_TSC_8WIRE macro

In TI's reference manual description for the `AFE_Pen_Ctrl' bit-field
of the TSC's CTRL register, there is no mention of 8-wire touchscreens.
Even commit f0933a60d190 ("mfd: ti_am335x_tscadc: Update logic in CTRL
register for 5-wire TS") says that the value of this bit-field must be
the same for 4-wire and 8-wire touchscreens. So let's remove the
CNTRLREG_TSC_8WIRE macro to avoid misunderstandings.

Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211125224642.21011-5-dariobin@libero.it
---
 include/linux/mfd/ti_am335x_tscadc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index ba13e043d910..4063b0614d90 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -103,7 +103,6 @@
 #define CNTRLREG_TSC_AFE_CTRL(val) FIELD_PREP(GENMASK(6, 5), (val))
 #define CNTRLREG_TSC_4WIRE	CNTRLREG_TSC_AFE_CTRL(1)
 #define CNTRLREG_TSC_5WIRE	CNTRLREG_TSC_AFE_CTRL(2)
-#define CNTRLREG_TSC_8WIRE	CNTRLREG_TSC_AFE_CTRL(3)
 #define CNTRLREG_TSC_ENB	BIT(7)
 
 /*Control registers bitfields  for MAGADC IP */
-- 
cgit v1.2.3


From 79478bf9ea9fa48d30836afa796ac13d8a0f320b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 Nov 2021 07:13:54 +0100
Subject: block: move blk_rq_err_bytes to scsi

blk_rq_err_bytes is only used by the scsi midlayer, so move it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20211117061404.331732-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2949d9ac7484..a78d9a0f2a1b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -947,7 +947,6 @@ struct req_iterator {
  * blk_rq_pos()			: the current sector
  * blk_rq_bytes()		: bytes left in the entire request
  * blk_rq_cur_bytes()		: bytes left in the current segment
- * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
  * blk_rq_stats_sectors()	: sectors of the entire request used for stats
@@ -971,8 +970,6 @@ static inline int blk_rq_cur_bytes(const struct request *rq)
 	return bio_iovec(rq->bio).bv_len;
 }
 
-unsigned int blk_rq_err_bytes(const struct request *rq);
-
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
 	return blk_rq_bytes(rq) >> SECTOR_SHIFT;
-- 
cgit v1.2.3


From 786d4e01c550e8bb7c9f9f23bca0596a2a33483c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 Nov 2021 07:13:55 +0100
Subject: block: remove rq_flush_dcache_pages

This function is trivial, and flush_dcache_page is always defined, so
just open code it in the 2.5 callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20211117061404.331732-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a78d9a0f2a1b..308edc2a4925 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1132,14 +1132,4 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 }
 #endif /* CONFIG_BLK_DEV_ZONED */
 
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-# error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
-#endif
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-void rq_flush_dcache_pages(struct request *rq);
-#else
-static inline void rq_flush_dcache_pages(struct request *rq)
-{
-}
-#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
 #endif /* BLK_MQ_H */
-- 
cgit v1.2.3


From 86416916466514e4ae0b7296d20133b6427c4c1f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:12 +0100
Subject: block: move GENHD_FL_NATIVE_CAPACITY to disk->state

The flag to indicate an unlocked native capacity is dynamic state,
not a driver capability flag, so move it to disk->state.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211122130625.1136848-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 74c410263113..e490a71e5e9d 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -60,12 +60,6 @@ struct partition_meta_info {
  * (``BLOCK_EXT_MAJOR``).
  * This affects the maximum number of partitions.
  *
- * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the
- * partition table, the device's capacity has been extended to its
- * native capacity; i.e. the device has hidden capacity used by one
- * of the partitions (this is a flag used so that native capacity is
- * only ever unlocked once).
- *
  * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
  * blocked whenever a writer holds an exclusive lock.
  *
@@ -86,7 +80,6 @@ struct partition_meta_info {
 #define GENHD_FL_CD				0x0008
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	0x0020
 #define GENHD_FL_EXT_DEVT			0x0040
-#define GENHD_FL_NATIVE_CAPACITY		0x0080
 #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE	0x0100
 #define GENHD_FL_NO_PART_SCAN			0x0200
 #define GENHD_FL_HIDDEN				0x0400
@@ -140,6 +133,7 @@ struct gendisk {
 #define GD_NEED_PART_SCAN		0
 #define GD_READ_ONLY			1
 #define GD_DEAD				2
+#define GD_NATIVE_CAPACITY		3
 
 	struct mutex open_mutex;	/* open/close mutex */
 	unsigned open_partitions;	/* number of open partitions */
-- 
cgit v1.2.3


From 1545e0b419ba1d9b9bee4061d4826340afe6b0aa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:13 +0100
Subject: block: move GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE to disk->event_flags

GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE is all about the event reporting
mechanism, so move it to the event_flags field.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211122130625.1136848-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e490a71e5e9d..c1136ff3c91f 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -60,9 +60,6 @@ struct partition_meta_info {
  * (``BLOCK_EXT_MAJOR``).
  * This affects the maximum number of partitions.
  *
- * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
- * blocked whenever a writer holds an exclusive lock.
- *
  * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
  * Used for loop devices in their default settings and some MMC
  * devices.
@@ -80,7 +77,6 @@ struct partition_meta_info {
 #define GENHD_FL_CD				0x0008
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	0x0020
 #define GENHD_FL_EXT_DEVT			0x0040
-#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE	0x0100
 #define GENHD_FL_NO_PART_SCAN			0x0200
 #define GENHD_FL_HIDDEN				0x0400
 
@@ -94,6 +90,8 @@ enum {
 	DISK_EVENT_FLAG_POLL			= 1 << 0,
 	/* Forward events to udev */
 	DISK_EVENT_FLAG_UEVENT			= 1 << 1,
+	/* Block event polling when open for exclusive write */
+	DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE	= 1 << 2,
 };
 
 struct disk_events;
-- 
cgit v1.2.3


From 1a827ce1b9f2c740d2c6a228afd972970c18bc21 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:14 +0100
Subject: block: remove GENHD_FL_CD

GENHD_FL_CD marks a gendisk as a vaguely CD-ROM like device.
Besides being used internally inside of sunvdc.c an xen-blkfront it
is used by xen-blkback as a hint to claim a device exported to a
guest is a CD-ROM like device.  Just check for disk->cdi instead
which is the right indicator for "real" CD-ROM or DVD drivers.  This
will miss the paravirtualized guest drivers, but those make little
sense to report anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211122130625.1136848-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c1136ff3c91f..74518c576fbb 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -46,10 +46,6 @@ struct partition_meta_info {
  * Must not be set for devices which are removed entirely when the
  * media is removed.
  *
- * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style
- * device.
- * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
- *
  * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
  * partition information in ``/proc/partitions`` or in the output of
  * printk_all_partitions().
@@ -74,7 +70,6 @@ struct partition_meta_info {
 #define GENHD_FL_REMOVABLE			0x0001
 /* 2 is unused (used to be GENHD_FL_DRIVERFS) */
 /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
-#define GENHD_FL_CD				0x0008
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	0x0020
 #define GENHD_FL_EXT_DEVT			0x0040
 #define GENHD_FL_NO_PART_SCAN			0x0200
-- 
cgit v1.2.3


From 46e7eac647b34ed4106a8262f8bedbb90801fadd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:17 +0100
Subject: block: rename GENHD_FL_NO_PART_SCAN to GENHD_FL_NO_PART

The GENHD_FL_NO_PART_SCAN controls more than just partitions canning,
so rename it to GENHD_FL_NO_PART.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20211122130625.1136848-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 74518c576fbb..0b9be3df9489 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -56,15 +56,15 @@ struct partition_meta_info {
  * (``BLOCK_EXT_MAJOR``).
  * This affects the maximum number of partitions.
  *
- * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
- * Used for loop devices in their default settings and some MMC
- * devices.
+ * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled.
+ * The kernel will not scan for partitions from add_disk, and users
+ * can't add partitions manually.
  *
  * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
  * doesn't produce events, doesn't appear in sysfs, and doesn't have
  * an associated ``bdev``.
  * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
- * ``GENHD_FL_NO_PART_SCAN``.
+ * ``GENHD_FL_NO_PART``.
  * Used for multipath devices.
  */
 #define GENHD_FL_REMOVABLE			0x0001
@@ -72,7 +72,7 @@ struct partition_meta_info {
 /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	0x0020
 #define GENHD_FL_EXT_DEVT			0x0040
-#define GENHD_FL_NO_PART_SCAN			0x0200
+#define GENHD_FL_NO_PART			0x0200
 #define GENHD_FL_HIDDEN				0x0400
 
 enum {
@@ -180,8 +180,7 @@ static inline int disk_max_parts(struct gendisk *disk)
 
 static inline bool disk_part_scan_enabled(struct gendisk *disk)
 {
-	return disk_max_parts(disk) > 1 &&
-		!(disk->flags & GENHD_FL_NO_PART_SCAN);
+	return disk_max_parts(disk) > 1 && !(disk->flags & GENHD_FL_NO_PART);
 }
 
 static inline dev_t disk_devt(struct gendisk *disk)
-- 
cgit v1.2.3


From 3b5149ac50970669ee0ddb9629ec77ffd5c0622d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:21 +0100
Subject: block: remove GENHD_FL_SUPPRESS_PARTITION_INFO

This flag is not set directly anywhere and only inherited from
GENHD_FL_HIDDEN.  Just check for GENHD_FL_HIDDEN instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211122130625.1136848-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0b9be3df9489..64a2f33ae9ea 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -46,11 +46,6 @@ struct partition_meta_info {
  * Must not be set for devices which are removed entirely when the
  * media is removed.
  *
- * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
- * partition information in ``/proc/partitions`` or in the output of
- * printk_all_partitions().
- * Used for the null block device and some MMC devices.
- *
  * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended
  * dynamic ``dev_t``, i.e. it wants extended device numbers
  * (``BLOCK_EXT_MAJOR``).
@@ -63,14 +58,12 @@ struct partition_meta_info {
  * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
  * doesn't produce events, doesn't appear in sysfs, and doesn't have
  * an associated ``bdev``.
- * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
- * ``GENHD_FL_NO_PART``.
+ * Implies ``GENHD_FL_NO_PART``.
  * Used for multipath devices.
  */
 #define GENHD_FL_REMOVABLE			0x0001
 /* 2 is unused (used to be GENHD_FL_DRIVERFS) */
 /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
-#define GENHD_FL_SUPPRESS_PARTITION_INFO	0x0020
 #define GENHD_FL_EXT_DEVT			0x0040
 #define GENHD_FL_NO_PART			0x0200
 #define GENHD_FL_HIDDEN				0x0400
-- 
cgit v1.2.3


From 1ebe2e5f9d68e94c524aba876f27b945669a7879 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:22 +0100
Subject: block: remove GENHD_FL_EXT_DEVT

All modern drivers can support extra partitions using the extended
dev_t.  In fact except for the ioctl method drivers never even see
partitions in normal operation.

So remove the GENHD_FL_EXT_DEVT and allow extra partitions for all
block devices that do support partitions, and require those that
do not support partitions to explicit disallow them using
GENHD_FL_NO_PART.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211122130625.1136848-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 64a2f33ae9ea..b8ced80178d6 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -46,11 +46,6 @@ struct partition_meta_info {
  * Must not be set for devices which are removed entirely when the
  * media is removed.
  *
- * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended
- * dynamic ``dev_t``, i.e. it wants extended device numbers
- * (``BLOCK_EXT_MAJOR``).
- * This affects the maximum number of partitions.
- *
  * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled.
  * The kernel will not scan for partitions from add_disk, and users
  * can't add partitions manually.
@@ -64,7 +59,6 @@ struct partition_meta_info {
 #define GENHD_FL_REMOVABLE			0x0001
 /* 2 is unused (used to be GENHD_FL_DRIVERFS) */
 /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
-#define GENHD_FL_EXT_DEVT			0x0040
 #define GENHD_FL_NO_PART			0x0200
 #define GENHD_FL_HIDDEN				0x0400
 
@@ -94,13 +88,13 @@ struct blk_integrity {
 };
 
 struct gendisk {
-	/* major, first_minor and minors are input parameters only,
-	 * don't use directly.  Use disk_devt() and disk_max_parts().
+	/*
+	 * major/first_minor/minors should not be set by any new driver, the
+	 * block core will take care of allocating them automatically.
 	 */
-	int major;			/* major number of driver */
+	int major;
 	int first_minor;
-	int minors;                     /* maximum number of minors, =1 for
-                                         * disks that can't be partitioned. */
+	int minors;
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
 
@@ -164,18 +158,6 @@ static inline bool disk_live(struct gendisk *disk)
 #define disk_to_cdi(disk)	NULL
 #endif
 
-static inline int disk_max_parts(struct gendisk *disk)
-{
-	if (disk->flags & GENHD_FL_EXT_DEVT)
-		return DISK_MAX_PARTS;
-	return disk->minors;
-}
-
-static inline bool disk_part_scan_enabled(struct gendisk *disk)
-{
-	return disk_max_parts(disk) > 1 && !(disk->flags & GENHD_FL_NO_PART);
-}
-
 static inline dev_t disk_devt(struct gendisk *disk)
 {
 	return MKDEV(disk->major, disk->first_minor);
-- 
cgit v1.2.3


From 430cc5d3ab4d0ba0bd011cfbb0035e46ba92920c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Nov 2021 14:06:24 +0100
Subject: block: cleanup the GENHD_FL_* definitions

Switch to an enum and tidy up the documentation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211122130625.1136848-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index b8ced80178d6..6906a45bc761 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -39,28 +39,24 @@ struct partition_meta_info {
 /**
  * DOC: genhd capability flags
  *
- * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device
- * gives access to removable media.
- * When set, the device remains present even when media is not
- * inserted.
- * Must not be set for devices which are removed entirely when the
+ * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to
+ * removable media.  When set, the device remains present even when media is not
+ * inserted.  Shall not be set for devices which are removed entirely when the
  * media is removed.
  *
- * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled.
- * The kernel will not scan for partitions from add_disk, and users
- * can't add partitions manually.
+ * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events,
+ * doesn't appear in sysfs, and can't be opened from userspace or using
+ * blkdev_get*. Used for the underlying components of multipath devices.
+ *
+ * ``GENHD_FL_NO_PART``: partition support is disabled.  The kernel will not
+ * scan for partitions from add_disk, and users can't add partitions manually.
  *
- * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
- * doesn't produce events, doesn't appear in sysfs, and doesn't have
- * an associated ``bdev``.
- * Implies ``GENHD_FL_NO_PART``.
- * Used for multipath devices.
  */
-#define GENHD_FL_REMOVABLE			0x0001
-/* 2 is unused (used to be GENHD_FL_DRIVERFS) */
-/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
-#define GENHD_FL_NO_PART			0x0200
-#define GENHD_FL_HIDDEN				0x0400
+enum {
+	GENHD_FL_REMOVABLE			= 1 << 0,
+	GENHD_FL_HIDDEN				= 1 << 1,
+	GENHD_FL_NO_PART			= 1 << 2,
+};
 
 enum {
 	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
-- 
cgit v1.2.3


From 48b5c1fbcd8c5bc6b91a56399a5257b801391dd8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 13 Nov 2021 14:03:26 -0700
Subject: block: only allocate poll_stats if there's a user of them

This is essentially never used, yet it's about 1/3rd of the total
queue size. Allocate it when needed, and don't embed it in the queue.

Kill the queue flag for this while at it, since we can just check the
assigned pointer now.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bd4370baccca..74118e67f649 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -267,7 +267,7 @@ struct request_queue {
 	int			poll_nsec;
 
 	struct blk_stat_callback	*poll_cb;
-	struct blk_rq_stat	poll_stat[BLK_MQ_POLL_STATS_BKTS];
+	struct blk_rq_stat	*poll_stat;
 
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
@@ -397,7 +397,6 @@ struct request_queue {
 #define QUEUE_FLAG_FUA		18	/* device supports FUA writes */
 #define QUEUE_FLAG_DAX		19	/* device supports DAX */
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
-#define QUEUE_FLAG_POLL_STATS	21	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
-- 
cgit v1.2.3


From 72cd9df2ef788d88c138d51223a01ca6281f232d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 23 Nov 2021 17:37:33 -0800
Subject: blk-crypto: remove blk_crypto_unregister()

This function is trivial and is only used in one place.  Having this
function is misleading because it implies that blk_crypto_register()
needs to be paired with blk_crypto_unregister(), which is not the case.
Just set disk->queue->crypto_profile to NULL directly.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211124013733.347612-1-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 74118e67f649..0a4416ef4fbf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1170,8 +1170,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
 bool blk_crypto_register(struct blk_crypto_profile *profile,
 			 struct request_queue *q);
 
-void blk_crypto_unregister(struct request_queue *q);
-
 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
 
 static inline bool blk_crypto_register(struct blk_crypto_profile *profile,
@@ -1180,8 +1178,6 @@ static inline bool blk_crypto_register(struct blk_crypto_profile *profile,
 	return true;
 }
 
-static inline void blk_crypto_unregister(struct request_queue *q) { }
-
 #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
 
 enum blk_unique_id {
-- 
cgit v1.2.3


From e8dc17e2893b4107366004810ca2a4acf1fc8563 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 25 Oct 2021 09:06:57 +0200
Subject: blk-mq: Add blk_mq_complete_request_direct()

Add blk_mq_complete_request_direct() which completes the block request
directly instead deferring it to softirq for single queue devices.

This is useful for devices which complete the requests in preemptible
context and raising softirq from means scheduling ksoftirqd.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211025070658.1565848-2-bigeasy@linutronix.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 308edc2a4925..d952c3442261 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -752,6 +752,17 @@ static inline void blk_mq_set_request_complete(struct request *rq)
 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 }
 
+/*
+ * Complete the request directly instead of deferring it to softirq or
+ * completing it another CPU. Useful in preemptible instead of an interrupt.
+ */
+static inline void blk_mq_complete_request_direct(struct request *rq,
+		   void (*complete)(struct request *rq))
+{
+	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+	complete(rq);
+}
+
 void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, blk_status_t error);
 void __blk_mq_end_request(struct request *rq, blk_status_t error);
-- 
cgit v1.2.3


From 88c9a2ce520ba381bb70658c80ec704f4d60f728 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Nov 2021 12:58:05 +0100
Subject: fork: move copy_io to block/blk-ioc.c

Move the copying of the I/O context to the block layer as that is where
we can use the proper low-level interfaces.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211126115817.2087431-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 0a9dc40b7be8..bcd47d104d8e 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -129,14 +129,6 @@ static inline void get_io_context_active(struct io_context *ioc)
 	atomic_inc(&ioc->active_ref);
 }
 
-static inline void ioc_task_link(struct io_context *ioc)
-{
-	get_io_context_active(ioc);
-
-	WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0);
-	atomic_inc(&ioc->nr_tasks);
-}
-
 struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
@@ -144,10 +136,21 @@ void put_io_context_active(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
+int __copy_io(unsigned long clone_flags, struct task_struct *tsk);
+static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+{
+	if (!current->io_context)
+		return 0;
+	return __copy_io(clone_flags, tsk);
+}
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
 static inline void exit_io_context(struct task_struct *task) { }
-#endif
+static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
+{
+	return 0;
+}
+#endif /* CONFIG_BLOCK */
 
-#endif
+#endif /* IOCONTEXT_H */
-- 
cgit v1.2.3


From 3304742562d27fb87a6d8291cc48824dd20f6964 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Nov 2021 12:58:09 +0100
Subject: block: mark put_io_context_active static

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211126115817.2087431-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index bcd47d104d8e..3ba45953d522 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -132,7 +132,6 @@ static inline void get_io_context_active(struct io_context *ioc)
 struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
-void put_io_context_active(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
-- 
cgit v1.2.3


From 50569c24be61eafb3efa06e2a3ccd447f75ae1b0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Nov 2021 12:58:12 +0100
Subject: block: remove get_io_context_active

Fold it into it's only caller, and remove a lof of the debug checks
that are not needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211126115817.2087431-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 3ba45953d522..c1229fbd6691 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -113,22 +113,6 @@ struct io_context {
 	struct work_struct release_work;
 };
 
-/**
- * get_io_context_active - get active reference on ioc
- * @ioc: ioc of interest
- *
- * Only iocs with active reference can issue new IOs.  This function
- * acquires an active reference on @ioc.  The caller must already have an
- * active reference on @ioc.
- */
-static inline void get_io_context_active(struct io_context *ioc)
-{
-	WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0);
-	WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0);
-	atomic_long_inc(&ioc->refcount);
-	atomic_inc(&ioc->active_ref);
-}
-
 struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
-- 
cgit v1.2.3


From f3fa33acca9f0058157214800f68b10d8e71ab7a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Nov 2021 13:18:00 +0100
Subject: block: remove the ->rq_disk field in struct request

Just use the disk attached to the request_queue instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20211126121802.2090656-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h       | 4 ----
 include/scsi/scsi_cmnd.h     | 2 +-
 include/scsi/scsi_device.h   | 4 ++--
 include/trace/events/block.h | 8 ++++----
 4 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d952c3442261..ede7bef8880a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -100,7 +100,6 @@ struct request {
 		struct request *rq_next;
 	};
 
-	struct gendisk *rq_disk;
 	struct block_device *part;
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
 	/* Time that the first bio started allocating this request. */
@@ -890,9 +889,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
 	rq->__data_len = bio->bi_iter.bi_size;
 	rq->bio = rq->biotail = bio;
 	rq->ioprio = bio_prio(bio);
-
-	if (bio->bi_bdev)
-		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 
 void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 477a800a9543..6794d7322cbd 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -164,7 +164,7 @@ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
 {
 	struct request *rq = scsi_cmd_to_rq(cmd);
 
-	return *(struct scsi_driver **)rq->rq_disk->private_data;
+	return *(struct scsi_driver **)rq->q->disk->private_data;
 }
 
 void scsi_done(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d1c6fc83b1e3..ab7557d84f75 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -275,9 +275,9 @@ scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...);
 	do {								\
 		struct request *__rq = scsi_cmd_to_rq((scmd));		\
 									\
-		if (__rq->rq_disk)					\
+		if (__rq->q->disk)					\
 			sdev_dbg((scmd)->device, "[%s] " fmt,		\
-				 __rq->rq_disk->disk_name, ##a);	\
+				 __rq->q->disk->disk_name, ##a);	\
 		else							\
 			sdev_dbg((scmd)->device, fmt, ##a);		\
 	} while (0)
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index a95daa4d4caa..27170e40e8c9 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -85,7 +85,7 @@ TRACE_EVENT(block_rq_requeue,
 	),
 
 	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->dev	   = rq->q->disk ? disk_devt(rq->q->disk) : 0;
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 
@@ -128,7 +128,7 @@ TRACE_EVENT(block_rq_complete,
 	),
 
 	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->dev	   = rq->q->disk ? disk_devt(rq->q->disk) : 0;
 		__entry->sector    = blk_rq_pos(rq);
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->error     = blk_status_to_errno(error);
@@ -161,7 +161,7 @@ DECLARE_EVENT_CLASS(block_rq,
 	),
 
 	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->dev	   = rq->q->disk ? disk_devt(rq->q->disk) : 0;
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 		__entry->bytes     = blk_rq_bytes(rq);
@@ -512,7 +512,7 @@ TRACE_EVENT(block_rq_remap,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= disk_devt(rq->rq_disk);
+		__entry->dev		= disk_devt(rq->q->disk);
 		__entry->sector		= blk_rq_pos(rq);
 		__entry->nr_sector	= blk_rq_sectors(rq);
 		__entry->old_dev	= dev;
-- 
cgit v1.2.3


From b84ba30b6c7a75babdf73b83bc3c7b59b944501a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Nov 2021 13:18:01 +0100
Subject: block: remove the gendisk argument to blk_execute_rq

Remove the gendisk aregument to blk_execute_rq and blk_execute_rq_nowait
given that it is unused now.  Also convert the boolean at_head parameter
to actually use the bool type while touching the prototype.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20211126121802.2090656-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ede7bef8880a..1b87b7c8bbff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -924,10 +924,9 @@ int blk_rq_unmap_user(struct bio *);
 int blk_rq_map_kern(struct request_queue *, struct request *, void *,
 		unsigned int, gfp_t);
 int blk_rq_append_bio(struct request *rq, struct bio *bio);
-void blk_execute_rq_nowait(struct gendisk *, struct request *, int,
-		rq_end_io_fn *);
-blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq,
-		int at_head);
+void blk_execute_rq_nowait(struct request *rq, bool at_head,
+		rq_end_io_fn *end_io);
+blk_status_t blk_execute_rq(struct request *rq, bool at_head);
 
 struct req_iterator {
 	struct bvec_iter iter;
-- 
cgit v1.2.3


From a30e3441325ba4011ddf125932cda21ca820c0bb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Nov 2021 13:18:02 +0100
Subject: scsi: remove the gendisk argument to scsi_ioctl

Now that blk_execute_rq does not take a gendisk argument there is no need
to pass it through the scsi_ioctl callchain either.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20211126121802.2090656-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/scsi/scsi_ioctl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h
index d2cb9aeaf1f1..beac64e38b87 100644
--- a/include/scsi/scsi_ioctl.h
+++ b/include/scsi/scsi_ioctl.h
@@ -45,8 +45,8 @@ typedef struct scsi_fctargaddress {
 
 int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev,
 		int cmd, bool ndelay);
-int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode,
-		int cmd, void __user *arg);
+int scsi_ioctl(struct scsi_device *sdev, fmode_t mode, int cmd,
+		void __user *arg);
 int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp);
 int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp);
 bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode);
-- 
cgit v1.2.3


From cdef485217d30382f3bf6448c54b4401648fe3f1 Mon Sep 17 00:00:00 2001
From: msizanoen1 <msizanoen@qtmlabs.xyz>
Date: Tue, 23 Nov 2021 13:48:32 +0100
Subject: ipv6: fix memory leak in fib6_rule_suppress

The kernel leaks memory when a `fib` rule is present in IPv6 nftables
firewall rules and a suppress_prefix rule is present in the IPv6 routing
rules (used by certain tools such as wg-quick). In such scenarios, every
incoming packet will leak an allocation in `ip6_dst_cache` slab cache.

After some hours of `bpftrace`-ing and source code reading, I tracked
down the issue to ca7a03c41753 ("ipv6: do not free rt if
FIB_LOOKUP_NOREF is set on suppress rule").

The problem with that change is that the generic `args->flags` always have
`FIB_LOOKUP_NOREF` set[1][2] but the IPv6-specific flag
`RT6_LOOKUP_F_DST_NOREF` might not be, leading to `fib6_rule_suppress` not
decreasing the refcount when needed.

How to reproduce:
 - Add the following nftables rule to a prerouting chain:
     meta nfproto ipv6 fib saddr . mark . iif oif missing drop
   This can be done with:
     sudo nft create table inet test
     sudo nft create chain inet test test_chain '{ type filter hook prerouting priority filter + 10; policy accept; }'
     sudo nft add rule inet test test_chain meta nfproto ipv6 fib saddr . mark . iif oif missing drop
 - Run:
     sudo ip -6 rule add table main suppress_prefixlength 0
 - Watch `sudo slabtop -o | grep ip6_dst_cache` to see memory usage increase
   with every incoming ipv6 packet.

This patch exposes the protocol-specific flags to the protocol
specific `suppress` function, and check the protocol-specific `flags`
argument for RT6_LOOKUP_F_DST_NOREF instead of the generic
FIB_LOOKUP_NOREF when decreasing the refcount, like this.

[1]: https://github.com/torvalds/linux/blob/ca7a03c4175366a92cee0ccc4fec0038c3266e26/net/ipv6/fib6_rules.c#L71
[2]: https://github.com/torvalds/linux/blob/ca7a03c4175366a92cee0ccc4fec0038c3266e26/net/ipv6/fib6_rules.c#L99

Link: https://bugzilla.kernel.org/show_bug.cgi?id=215105
Fixes: ca7a03c41753 ("ipv6: do not free rt if FIB_LOOKUP_NOREF is set on suppress rule")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 4b10676c69d1..bd07484ab9dd 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -69,7 +69,7 @@ struct fib_rules_ops {
 	int			(*action)(struct fib_rule *,
 					  struct flowi *, int,
 					  struct fib_lookup_arg *);
-	bool			(*suppress)(struct fib_rule *,
+	bool			(*suppress)(struct fib_rule *, int,
 					    struct fib_lookup_arg *);
 	int			(*match)(struct fib_rule *,
 					 struct flowi *, int);
@@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule,
 			    struct fib_lookup_arg *arg));
 
 INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule,
+						int flags,
 						struct fib_lookup_arg *arg));
 INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule,
+						int flags,
 						struct fib_lookup_arg *arg));
 #endif
-- 
cgit v1.2.3


From 20ae1d6aa159eb91a9bf09ff92ccaa94dbea92c2 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 29 Nov 2021 10:39:25 -0500
Subject: wireguard: device: reset peer src endpoint when netns exits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each peer's endpoint contains a dst_cache entry that takes a reference
to another netdev. When the containing namespace exits, we take down the
socket and prevent future sockets from being created (by setting
creating_net to NULL), which removes that potential reference on the
netns. However, it doesn't release references to the netns that a netdev
cached in dst_cache might be taking, so the netns still might fail to
exit. Since the socket is gimped anyway, we can simply clear all the
dst_caches (by way of clearing the endpoint src), which will release all
references.

However, the current dst_cache_reset function only releases those
references lazily. But it turns out that all of our usages of
wg_socket_clear_peer_endpoint_src are called from contexts that are not
exactly high-speed or bottle-necked. For example, when there's
connection difficulty, or when userspace is reconfiguring the interface.
And in particular for this patch, when the netns is exiting. So for
those cases, it makes more sense to call dst_release immediately. For
that, we add a small helper function to dst_cache.

This patch also adds a test to netns.sh from Hangbin Liu to ensure this
doesn't regress.

Tested-by: Hangbin Liu <liuhangbin@gmail.com>
Reported-by: Xiumei Mu <xmu@redhat.com>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Fixes: 900575aa33a3 ("wireguard: device: avoid circular netns references")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dst_cache.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index 67634675e919..df6622a5fe98 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache)
 	dst_cache->reset_ts = jiffies;
 }
 
+/**
+ *	dst_cache_reset_now - invalidate the cache contents immediately
+ *	@dst_cache: the cache
+ *
+ *	The caller must be sure there are no concurrent users, as this frees
+ *	all dst_cache users immediately, rather than waiting for the next
+ *	per-cpu usage like dst_cache_reset does. Most callers should use the
+ *	higher speed lazily-freed dst_cache_reset function instead.
+ */
+void dst_cache_reset_now(struct dst_cache *dst_cache);
+
 /**
  *	dst_cache_init - initialize the cache, allocating the required storage
  *	@dst_cache: the cache
-- 
cgit v1.2.3


From f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 29 Nov 2021 10:39:29 -0500
Subject: siphash: use _unaligned version by default

On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
because the ordinary load/store instructions (ldr, ldrh, ldrb) can
tolerate any misalignment of the memory address. However, load/store
double and load/store multiple instructions (ldrd, ldm) may still only
be used on memory addresses that are 32-bit aligned, and so we have to
use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we
may end up with a severe performance hit due to alignment traps that
require fixups by the kernel. Testing shows that this currently happens
with clang-13 but not gcc-11. In theory, any compiler version can
produce this bug or other problems, as we are dealing with undefined
behavior in C99 even on architectures that support this in hardware,
see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363.

Fortunately, the get_unaligned() accessors do the right thing: when
building for ARMv6 or later, the compiler will emit unaligned accesses
using the ordinary load/store instructions (but avoid the ones that
require 32-bit alignment). When building for older ARM, those accessors
will emit the appropriate sequence of ldrb/mov/orr instructions. And on
architectures that can truly tolerate any kind of misalignment, the
get_unaligned() accessors resolve to the leXX_to_cpup accessors that
operate on aligned addresses.

Since the compiler will in fact emit ldrd or ldm instructions when
building this code for ARM v6 or later, the solution is to use the
unaligned accessors unconditionally on architectures where this is
known to be fast. The _aligned version of the hash function is
however still needed to get the best performance on architectures
that cannot do any unaligned access in hardware.

This new version avoids the undefined behavior and should produce
the fastest hash on all architectures we support.

Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@linaro.org/
Link: https://lore.kernel.org/linux-crypto/CAK8P3a2KfmmGDbVHULWevB0hv71P2oi2ZCHEAqT=8dQfa0=cqQ@mail.gmail.com/
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/siphash.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
index bf21591a9e5e..0cda61855d90 100644
--- a/include/linux/siphash.h
+++ b/include/linux/siphash.h
@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key)
 }
 
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
-#endif
 
 u64 siphash_1u64(const u64 a, const siphash_key_t *key);
 u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
 static inline u64 siphash(const void *data, size_t len,
 			  const siphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+	    !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
 		return __siphash_unaligned(data, len, key);
-#endif
 	return ___siphash_aligned(data, len, key);
 }
 
@@ -96,10 +93,8 @@ typedef struct {
 
 u32 __hsiphash_aligned(const void *data, size_t len,
 		       const hsiphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key);
-#endif
 
 u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
 u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
@@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
 static inline u32 hsiphash(const void *data, size_t len,
 			   const hsiphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+	    !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
 		return __hsiphash_unaligned(data, len, key);
-#endif
 	return ___hsiphash_aligned(data, len, key);
 }
 
-- 
cgit v1.2.3


From b93199b2867646be5b1c84cc0a844df023877806 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 29 Nov 2021 10:48:39 +0100
Subject: drm/ttm: Don't include drm_hashtab.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the include statement for drm_hashtab.h. It's not required
by TTM.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211129094841.22499-2-tzimmermann@suse.de
---
 include/drm/ttm/ttm_bo_api.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index cd785cfa3123..c17b2df9178b 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -32,7 +32,6 @@
 #define _TTM_BO_API_H_
 
 #include <drm/drm_gem.h>
-#include <drm/drm_hashtab.h>
 #include <drm/drm_vma_manager.h>
 #include <linux/kref.h>
 #include <linux/list.h>
-- 
cgit v1.2.3


From a21800bced7cbaf7bb8f5281db17a5d7ef6e197a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 29 Nov 2021 10:48:41 +0100
Subject: drm: Declare hashtable as legacy

The DRM hashtable code is only used by internal functions for legacy
UMS drivers. Move the implementation behind CONFIG_DRM_LEGACY and the
declarations into legacy header files. Unexport the symbols.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211129094841.22499-4-tzimmermann@suse.de
---
 include/drm/drm_device.h  |  5 +--
 include/drm/drm_hashtab.h | 79 -----------------------------------------------
 include/drm/drm_legacy.h  | 15 ++++++++-
 3 files changed, 15 insertions(+), 84 deletions(-)
 delete mode 100644 include/drm/drm_hashtab.h

(limited to 'include')

diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 604b1d1b2d72..9923c7a6885e 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -6,16 +6,13 @@
 #include <linux/mutex.h>
 #include <linux/idr.h>
 
-#include <drm/drm_hashtab.h>
+#include <drm/drm_legacy.h>
 #include <drm/drm_mode_config.h>
 
 struct drm_driver;
 struct drm_minor;
 struct drm_master;
-struct drm_device_dma;
 struct drm_vblank_crtc;
-struct drm_sg_mem;
-struct drm_local_map;
 struct drm_vma_offset_manager;
 struct drm_vram_mm;
 struct drm_fb_helper;
diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h
deleted file mode 100644
index bb95ff011baf..000000000000
--- a/include/drm/drm_hashtab.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismack, ND. USA.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- *
- **************************************************************************/
-/*
- * Simple open hash tab implementation.
- *
- * Authors:
- * Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-#ifndef DRM_HASHTAB_H
-#define DRM_HASHTAB_H
-
-#include <linux/list.h>
-
-#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member)
-
-struct drm_hash_item {
-	struct hlist_node head;
-	unsigned long key;
-};
-
-struct drm_open_hash {
-	struct hlist_head *table;
-	u8 order;
-};
-
-int drm_ht_create(struct drm_open_hash *ht, unsigned int order);
-int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item);
-int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item,
-			      unsigned long seed, int bits, int shift,
-			      unsigned long add);
-int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item);
-
-void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key);
-int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key);
-int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item);
-void drm_ht_remove(struct drm_open_hash *ht);
-
-/*
- * RCU-safe interface
- *
- * The user of this API needs to make sure that two or more instances of the
- * hash table manipulation functions are never run simultaneously.
- * The lookup function drm_ht_find_item_rcu may, however, run simultaneously
- * with any of the manipulation functions as long as it's called from within
- * an RCU read-locked section.
- */
-#define drm_ht_insert_item_rcu drm_ht_insert_item
-#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please
-#define drm_ht_remove_key_rcu drm_ht_remove_key
-#define drm_ht_remove_item_rcu drm_ht_remove_item
-#define drm_ht_find_item_rcu drm_ht_find_item
-
-#endif
diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h
index 58dc8d8cc907..0fc85418aad8 100644
--- a/include/drm/drm_legacy.h
+++ b/include/drm/drm_legacy.h
@@ -37,7 +37,6 @@
 
 #include <drm/drm.h>
 #include <drm/drm_auth.h>
-#include <drm/drm_hashtab.h>
 
 struct drm_device;
 struct drm_driver;
@@ -51,6 +50,20 @@ struct pci_driver;
  * you're doing it terribly wrong.
  */
 
+/*
+ * Hash-table Support
+ */
+
+struct drm_hash_item {
+	struct hlist_node head;
+	unsigned long key;
+};
+
+struct drm_open_hash {
+	struct hlist_head *table;
+	u8 order;
+};
+
 /**
  * DMA buffer.
  */
-- 
cgit v1.2.3


From d0c4e34db0b0a012352dad499a13738b4102f277 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 15 Nov 2021 13:01:46 +0100
Subject: drm/cma-helper: Move driver and file ops to the end of header

Restructure the header file for CMA helpers by moving declarations
for driver and file operations to the end of the file. No functional
changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Maxime Ripard <maxime@cerno.tech>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211115120148.21766-2-tzimmermann@suse.de
---
 include/drm/drm_gem_cma_helper.h | 114 ++++++++++++++++++++-------------------
 1 file changed, 60 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index cd13508acbc1..e0fb7a0cf03f 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -32,77 +32,40 @@ struct drm_gem_cma_object {
 #define to_drm_gem_cma_obj(gem_obj) \
 	container_of(gem_obj, struct drm_gem_cma_object, base)
 
-#ifndef CONFIG_MMU
-#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
-	.get_unmapped_area	= drm_gem_cma_get_unmapped_area,
-#else
-#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS
-#endif
-
-/**
- * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers
- * @name: name for the generated structure
- *
- * This macro autogenerates a suitable &struct file_operations for CMA based
- * drivers, which can be assigned to &drm_driver.fops. Note that this structure
- * cannot be shared between drivers, because it contains a reference to the
- * current module using THIS_MODULE.
- *
- * Note that the declaration is already marked as static - if you need a
- * non-static version of this you're probably doing it wrong and will break the
- * THIS_MODULE reference by accident.
- */
-#define DEFINE_DRM_GEM_CMA_FOPS(name) \
-	static const struct file_operations name = {\
-		.owner		= THIS_MODULE,\
-		.open		= drm_open,\
-		.release	= drm_release,\
-		.unlocked_ioctl	= drm_ioctl,\
-		.compat_ioctl	= drm_compat_ioctl,\
-		.poll		= drm_poll,\
-		.read		= drm_read,\
-		.llseek		= noop_llseek,\
-		.mmap		= drm_gem_mmap,\
-		DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
-	}
-
 /* free GEM object */
 void drm_gem_cma_free_object(struct drm_gem_object *gem_obj);
 
-/* create memory region for DRM framebuffer */
-int drm_gem_cma_dumb_create_internal(struct drm_file *file_priv,
-				     struct drm_device *drm,
-				     struct drm_mode_create_dumb *args);
-
-/* create memory region for DRM framebuffer */
-int drm_gem_cma_dumb_create(struct drm_file *file_priv,
-			    struct drm_device *drm,
-			    struct drm_mode_create_dumb *args);
-
 /* allocate physical memory */
 struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 					      size_t size);
 
 extern const struct vm_operations_struct drm_gem_cma_vm_ops;
 
-#ifndef CONFIG_MMU
-unsigned long drm_gem_cma_get_unmapped_area(struct file *filp,
-					    unsigned long addr,
-					    unsigned long len,
-					    unsigned long pgoff,
-					    unsigned long flags);
-#endif
-
 void drm_gem_cma_print_info(struct drm_printer *p, unsigned int indent,
 			    const struct drm_gem_object *obj);
 
 struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj);
+int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
+int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+
+/*
+ * Driver ops
+ */
+
+/* create memory region for DRM framebuffer */
+int drm_gem_cma_dumb_create_internal(struct drm_file *file_priv,
+				     struct drm_device *drm,
+				     struct drm_mode_create_dumb *args);
+
+/* create memory region for DRM framebuffer */
+int drm_gem_cma_dumb_create(struct drm_file *file_priv,
+			    struct drm_device *drm,
+			    struct drm_mode_create_dumb *args);
+
 struct drm_gem_object *
 drm_gem_cma_prime_import_sg_table(struct drm_device *dev,
 				  struct dma_buf_attachment *attach,
 				  struct sg_table *sgt);
-int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
-int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 
 /**
  * DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE - CMA GEM driver operations
@@ -185,4 +148,47 @@ drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm,
 				       struct dma_buf_attachment *attach,
 				       struct sg_table *sgt);
 
+/*
+ * File ops
+ */
+
+#ifndef CONFIG_MMU
+unsigned long drm_gem_cma_get_unmapped_area(struct file *filp,
+					    unsigned long addr,
+					    unsigned long len,
+					    unsigned long pgoff,
+					    unsigned long flags);
+#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
+	.get_unmapped_area	= drm_gem_cma_get_unmapped_area,
+#else
+#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS
+#endif
+
+/**
+ * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers
+ * @name: name for the generated structure
+ *
+ * This macro autogenerates a suitable &struct file_operations for CMA based
+ * drivers, which can be assigned to &drm_driver.fops. Note that this structure
+ * cannot be shared between drivers, because it contains a reference to the
+ * current module using THIS_MODULE.
+ *
+ * Note that the declaration is already marked as static - if you need a
+ * non-static version of this you're probably doing it wrong and will break the
+ * THIS_MODULE reference by accident.
+ */
+#define DEFINE_DRM_GEM_CMA_FOPS(name) \
+	static const struct file_operations name = {\
+		.owner		= THIS_MODULE,\
+		.open		= drm_open,\
+		.release	= drm_release,\
+		.unlocked_ioctl	= drm_ioctl,\
+		.compat_ioctl	= drm_compat_ioctl,\
+		.poll		= drm_poll,\
+		.read		= drm_read,\
+		.llseek		= noop_llseek,\
+		.mmap		= drm_gem_mmap,\
+		DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
+	}
+
 #endif /* __DRM_GEM_CMA_HELPER_H__ */
-- 
cgit v1.2.3


From 05b1de51df077a2089e3d8ceec68aa687cff15db Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 15 Nov 2021 13:01:47 +0100
Subject: drm/cma-helper: Export dedicated wrappers for GEM object functions

Wrap GEM CMA functions for struct drm_gem_object_funcs and update
all callers. This will allow for an update of the public interfaces
of the GEM CMA helper library.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20211115120148.21766-3-tzimmermann@suse.de
---
 include/drm/drm_gem_cma_helper.h | 78 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index e0fb7a0cf03f..56d2f9fdf9ac 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -48,6 +48,84 @@ struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj);
 int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
 int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 
+/*
+ * GEM object functions
+ */
+
+/**
+ * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free_object()
+ * @obj: GEM object to free
+ *
+ * This function wraps drm_gem_cma_free_object(). Drivers that employ the CMA helpers
+ * should use it as their &drm_gem_object_funcs.free handler.
+ */
+static inline void drm_gem_cma_object_free(struct drm_gem_object *obj)
+{
+	drm_gem_cma_free_object(obj);
+}
+
+/**
+ * drm_gem_cma_object_print_info() - Print &drm_gem_cma_object info for debugfs
+ * @p: DRM printer
+ * @indent: Tab indentation level
+ * @obj: GEM object
+ *
+ * This function wraps drm_gem_cma_print_info(). Drivers that employ the CMA helpers
+ * should use this function as their &drm_gem_object_funcs.print_info handler.
+ */
+static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned int indent,
+						 const struct drm_gem_object *obj)
+{
+	drm_gem_cma_print_info(p, indent, obj);
+}
+
+/**
+ * drm_gem_cma_object_get_sg_table - GEM object function for drm_gem_cma_get_sg_table()
+ * @obj: GEM object
+ *
+ * This function wraps drm_gem_cma_get_sg_table(). Drivers that employ the CMA helpers should
+ * use it as their &drm_gem_object_funcs.get_sg_table handler.
+ *
+ * Returns:
+ * A pointer to the scatter/gather table of pinned pages or NULL on failure.
+ */
+static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_object *obj)
+{
+	return drm_gem_cma_get_sg_table(obj);
+}
+
+/*
+ * drm_gem_cma_object_vmap - GEM object function for drm_gem_cma_vmap()
+ * @obj: GEM object
+ * @map: Returns the kernel virtual address of the CMA GEM object's backing store.
+ *
+ * This function wraps drm_gem_cma_vmap(). Drivers that employ the CMA helpers should
+ * use it as their &drm_gem_object_funcs.vmap handler.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+{
+	return drm_gem_cma_vmap(obj, map);
+}
+
+/**
+ * drm_gem_cma_object_mmap - GEM object function for drm_gem_cma_mmap()
+ * @obj: GEM object
+ * @vma: VMA for the area to be mapped
+ *
+ * This function wraps drm_gem_cma_mmap(). Drivers that employ the cma helpers should
+ * use it as their &drm_gem_object_funcs.mmap handler.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static inline int drm_gem_cma_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	return drm_gem_cma_mmap(obj, vma);
+}
+
 /*
  * Driver ops
  */
-- 
cgit v1.2.3


From e580ea25c08d9e89593bcf80640e29108f0542cb Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 15 Nov 2021 13:01:48 +0100
Subject: drm/cma-helper: Pass GEM CMA object in public interfaces

Change all GEM CMA object functions that receive a GEM object
of type struct drm_gem_object to expect an object of type
struct drm_gem_cma_object instead.

This change reduces the number of upcasts from struct drm_gem_object
by moving them into callers. The C compiler can now verify that the
GEM CMA functions are called with the correct type.

For consistency, the patch also renames drm_gem_cma_free_object to
drm_gem_cma_free. It further updates documentation for a number of
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20211115120148.21766-4-tzimmermann@suse.de
---
 include/drm/drm_gem_cma_helper.h | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index 56d2f9fdf9ac..adb507a9dbf0 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -32,28 +32,23 @@ struct drm_gem_cma_object {
 #define to_drm_gem_cma_obj(gem_obj) \
 	container_of(gem_obj, struct drm_gem_cma_object, base)
 
-/* free GEM object */
-void drm_gem_cma_free_object(struct drm_gem_object *gem_obj);
-
-/* allocate physical memory */
 struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 					      size_t size);
+void drm_gem_cma_free(struct drm_gem_cma_object *cma_obj);
+void drm_gem_cma_print_info(const struct drm_gem_cma_object *cma_obj,
+			    struct drm_printer *p, unsigned int indent);
+struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_cma_object *cma_obj);
+int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, struct dma_buf_map *map);
+int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *vma);
 
 extern const struct vm_operations_struct drm_gem_cma_vm_ops;
 
-void drm_gem_cma_print_info(struct drm_printer *p, unsigned int indent,
-			    const struct drm_gem_object *obj);
-
-struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj);
-int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
-int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
 /*
  * GEM object functions
  */
 
 /**
- * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free_object()
+ * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free()
  * @obj: GEM object to free
  *
  * This function wraps drm_gem_cma_free_object(). Drivers that employ the CMA helpers
@@ -61,7 +56,9 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
  */
 static inline void drm_gem_cma_object_free(struct drm_gem_object *obj)
 {
-	drm_gem_cma_free_object(obj);
+	struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);
+
+	drm_gem_cma_free(cma_obj);
 }
 
 /**
@@ -76,7 +73,9 @@ static inline void drm_gem_cma_object_free(struct drm_gem_object *obj)
 static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned int indent,
 						 const struct drm_gem_object *obj)
 {
-	drm_gem_cma_print_info(p, indent, obj);
+	const struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);
+
+	drm_gem_cma_print_info(cma_obj, p, indent);
 }
 
 /**
@@ -91,7 +90,9 @@ static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned
  */
 static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_object *obj)
 {
-	return drm_gem_cma_get_sg_table(obj);
+	struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);
+
+	return drm_gem_cma_get_sg_table(cma_obj);
 }
 
 /*
@@ -107,7 +108,9 @@ static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_ob
  */
 static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
 {
-	return drm_gem_cma_vmap(obj, map);
+	struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);
+
+	return drm_gem_cma_vmap(cma_obj, map);
 }
 
 /**
@@ -123,7 +126,9 @@ static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma
  */
 static inline int drm_gem_cma_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 {
-	return drm_gem_cma_mmap(obj, vma);
+	struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj);
+
+	return drm_gem_cma_mmap(cma_obj, vma);
 }
 
 /*
-- 
cgit v1.2.3


From 4047b9db1aa7512a10ba3560a3f63821c8c40235 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Mon, 29 Nov 2021 01:28:54 +0530
Subject: net: stmmac: Add platform level debug register dump feature

dwmac-qcom-ethqos currently exposes a mechanism to dump rgmii registers
after the 'stmmac_dvr_probe()' returns. However with commit
5ec55823438e ("net: stmmac: add clocks management for gmac driver"),
we now let 'pm_runtime_put()' disable the clocks before returning from
'stmmac_dvr_probe()'.

This causes a crash when 'rgmii_dump()' register dumps are enabled,
as the clocks are already off.

Since other dwmac drivers (possible future users as well) might
require a similar register dump feature, introduce a platform level
callback to allow the same.

This fixes the crash noticed while enabling rgmii_dump() dumps in
dwmac-qcom-ethqos driver as well. It also allows future changes
to keep a invoking the register dump callback from the correct
place inside 'stmmac_dvr_probe()'.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 89b8e208cd7b..24eea1b05ca2 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -233,6 +233,7 @@ struct plat_stmmacenet_data {
 	int (*clks_config)(void *priv, bool enabled);
 	int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
 			   void *ctx);
+	void (*dump_debug_regs)(void *priv);
 	void *bsp_priv;
 	struct clk *stmmac_clk;
 	struct clk *pclk;
-- 
cgit v1.2.3


From 5944b5abd8646e8c6ac6af2b55f87dede1dae898 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 30 Nov 2021 12:29:47 +0800
Subject: Bonding: add arp_missed_max option

Currently, we use hard code number to verify if we are in the
arp_interval timeslice. But some user may want to reduce/extend
the verify timeslice. With the similar team option 'missed_max'
the uers could change that number based on their own environment.

Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bond_options.h   | 1 +
 include/net/bonding.h        | 1 +
 include/uapi/linux/if_link.h | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index e64833a674eb..dd75c071f67e 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -65,6 +65,7 @@ enum {
 	BOND_OPT_NUM_PEER_NOTIF_ALIAS,
 	BOND_OPT_PEER_NOTIF_DELAY,
 	BOND_OPT_LACP_ACTIVE,
+	BOND_OPT_MISSED_MAX,
 	BOND_OPT_LAST
 };
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 15e083e18f75..f6ae3a4baea4 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -121,6 +121,7 @@ struct bond_params {
 	int xmit_policy;
 	int miimon;
 	u8 num_peer_notif;
+	u8 missed_max;
 	int arp_interval;
 	int arp_validate;
 	int arp_all_targets;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index eebd3894fe89..4ac53b30b6dc 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -858,6 +858,7 @@ enum {
 	IFLA_BOND_TLB_DYNAMIC_LB,
 	IFLA_BOND_PEER_NOTIF_DELAY,
 	IFLA_BOND_AD_LACP_ACTIVE,
+	IFLA_BOND_MISSED_MAX,
 	__IFLA_BOND_MAX,
 };
 
-- 
cgit v1.2.3


From 4c897cfc46a554a523343fc3296333c473a2fc52 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 30 Nov 2021 12:16:20 +0200
Subject: devlink: Simplify devlink resources unregister call

The devlink_resources_unregister() used second parameter as an
entry point for the recursive removal of devlink resources. None
of the callers outside of devlink core needed to use this field,
so let's remove it.

As part of this removal, the "struct devlink_resource" was moved
from .h to .c file as it is not possible to use in any place in
the code except devlink.c.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index e3c88fabd700..043fcec8b0aa 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -361,33 +361,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para
 
 typedef u64 devlink_resource_occ_get_t(void *priv);
 
-/**
- * struct devlink_resource - devlink resource
- * @name: name of the resource
- * @id: id, per devlink instance
- * @size: size of the resource
- * @size_new: updated size of the resource, reload is needed
- * @size_valid: valid in case the total size of the resource is valid
- *              including its children
- * @parent: parent resource
- * @size_params: size parameters
- * @list: parent list
- * @resource_list: list of child resources
- */
-struct devlink_resource {
-	const char *name;
-	u64 id;
-	u64 size;
-	u64 size_new;
-	bool size_valid;
-	struct devlink_resource *parent;
-	struct devlink_resource_size_params size_params;
-	struct list_head list;
-	struct list_head resource_list;
-	devlink_resource_occ_get_t *occ_get;
-	void *occ_get_priv;
-};
-
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
 #define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports"
@@ -1571,8 +1544,7 @@ int devlink_resource_register(struct devlink *devlink,
 			      u64 resource_id,
 			      u64 parent_resource_id,
 			      const struct devlink_resource_size_params *size_params);
-void devlink_resources_unregister(struct devlink *devlink,
-				  struct devlink_resource *resource);
+void devlink_resources_unregister(struct devlink *devlink);
 int devlink_resource_size_get(struct devlink *devlink,
 			      u64 resource_id,
 			      u64 *p_resource_size);
-- 
cgit v1.2.3


From 79364031c5b4365ca28ac0fa00acfab5bf465be1 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 27 Nov 2021 17:32:00 +0100
Subject: bpf: Make sure bpf_disable_instrumentation() is safe vs preemption.

The initial implementation of migrate_disable() for mainline was a
wrapper around preempt_disable(). RT kernels substituted this with a
real migrate disable implementation.

Later on mainline gained true migrate disable support, but neither
documentation nor affected code were updated.

Remove stale comments claiming that migrate_disable() is PREEMPT_RT only.

Don't use __this_cpu_inc() in the !PREEMPT_RT path because preemption is
not disabled and the RMW operation can be preempted.

Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211127163200.10466-3-bigeasy@linutronix.de
---
 include/linux/bpf.h    | 16 ++--------------
 include/linux/filter.h |  3 ---
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 84ff6ef49462..755f38e893be 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1353,28 +1353,16 @@ extern struct mutex bpf_stats_enabled_mutex;
  * kprobes, tracepoints) to prevent deadlocks on map operations as any of
  * these events can happen inside a region which holds a map bucket lock
  * and can deadlock on it.
- *
- * Use the preemption safe inc/dec variants on RT because migrate disable
- * is preemptible on RT and preemption in the middle of the RMW operation
- * might lead to inconsistent state. Use the raw variants for non RT
- * kernels as migrate_disable() maps to preempt_disable() so the slightly
- * more expensive save operation can be avoided.
  */
 static inline void bpf_disable_instrumentation(void)
 {
 	migrate_disable();
-	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-		this_cpu_inc(bpf_prog_active);
-	else
-		__this_cpu_inc(bpf_prog_active);
+	this_cpu_inc(bpf_prog_active);
 }
 
 static inline void bpf_enable_instrumentation(void)
 {
-	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-		this_cpu_dec(bpf_prog_active);
-	else
-		__this_cpu_dec(bpf_prog_active);
+	this_cpu_dec(bpf_prog_active);
 	migrate_enable();
 }
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 24b7ed2677af..534f678ca50f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -640,9 +640,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void
  * This uses migrate_disable/enable() explicitly to document that the
  * invocation of a BPF program does not require reentrancy protection
  * against a BPF program which is invoked from a preempting task.
- *
- * For non RT enabled kernels migrate_disable/enable() maps to
- * preempt_disable/enable(), i.e. it disables also preemption.
  */
 static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
 					  const void *ctx)
-- 
cgit v1.2.3


From 15fa179f3f45415696d376abc84e0098a9586b33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
 <amadeuszx.slawinski@linux.intel.com>
Date: Fri, 26 Nov 2021 15:03:53 +0100
Subject: ALSA: hda: Fill gaps in NHLT endpoint-interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two key operations missings are: endpoint presence-check and retrieval
of matching endpoint hardware configuration (blob). Add operations for
both use cases.

Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Link: https://lore.kernel.org/r/20211126140355.1042684-2-cezary.rojewski@intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/intel-nhlt.h | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
index d0574805865f..089a760d36eb 100644
--- a/include/sound/intel-nhlt.h
+++ b/include/sound/intel-nhlt.h
@@ -10,6 +10,14 @@
 
 #include <linux/acpi.h>
 
+enum nhlt_link_type {
+	NHLT_LINK_HDA = 0,
+	NHLT_LINK_DSP = 1,
+	NHLT_LINK_DMIC = 2,
+	NHLT_LINK_SSP = 3,
+	NHLT_LINK_INVALID
+};
+
 #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT)
 
 struct wav_fmt {
@@ -33,14 +41,6 @@ struct wav_fmt_ext {
 	u8 sub_fmt[16];
 } __packed;
 
-enum nhlt_link_type {
-	NHLT_LINK_HDA = 0,
-	NHLT_LINK_DSP = 1,
-	NHLT_LINK_DMIC = 2,
-	NHLT_LINK_SSP = 3,
-	NHLT_LINK_INVALID
-};
-
 enum nhlt_device_type {
 	NHLT_DEVICE_BT = 0,
 	NHLT_DEVICE_DMIC = 1,
@@ -132,6 +132,12 @@ void intel_nhlt_free(struct nhlt_acpi_table *addr);
 
 int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt);
 
+bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type);
+struct nhlt_specific_cfg *
+intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
+			     u32 bus_id, u8 link_type, u8 vbps, u8 bps,
+			     u8 num_ch, u32 rate, u8 dir, u8 dev_type);
+
 #else
 
 struct nhlt_acpi_table;
@@ -150,6 +156,21 @@ static inline int intel_nhlt_get_dmic_geo(struct device *dev,
 {
 	return 0;
 }
+
+static inline bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt,
+						u8 link_type)
+{
+	return false;
+}
+
+static inline struct nhlt_specific_cfg *
+intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
+			     u32 bus_id, u8 link_type, u8 vbps, u8 bps,
+			     u8 num_ch, u32 rate, u8 dir, u8 dev_type)
+{
+	return NULL;
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From e6f2dd0f80674e9d5960337b3e9c2a242441b326 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannekoong@fb.com>
Date: Mon, 29 Nov 2021 19:06:19 -0800
Subject: bpf: Add bpf_loop helper

This patch adds the kernel-side and API changes for a new helper
function, bpf_loop:

long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
u64 flags);

where long (*callback_fn)(u32 index, void *ctx);

bpf_loop invokes the "callback_fn" **nr_loops** times or until the
callback_fn returns 1. The callback_fn can only return 0 or 1, and
this is enforced by the verifier. The callback_fn index is zero-indexed.

A few things to please note:
~ The "u64 flags" parameter is currently unused but is included in
case a future use case for it arises.
~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c),
bpf_callback_t is used as the callback function cast.
~ A program can have nested bpf_loop calls but the program must
still adhere to the verifier constraint of its stack depth (the stack depth
cannot exceed MAX_BPF_STACK))
~ Recursive callback_fns do not pass the verifier, due to the call stack
for these being too deep.
~ The next patch will include the tests and benchmark

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-2-joannekoong@fb.com
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc7a0c36e7df..cad0829710be 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
+extern const struct bpf_func_proto bpf_loop_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a69e4b04ffeb..211b43afd0fb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4957,6 +4957,30 @@ union bpf_attr {
  *		**-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
  *		**-EBUSY** if failed to try lock mmap_lock.
  *		**-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		For **nr_loops**, call **callback_fn** function
+ *		with **callback_ctx** as the context parameter.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		The **flags** is used to control certain aspects of the helper.
+ *		Currently, the **flags** must be 0. Currently, nr_loops is
+ *		limited to 1 << 23 (~8 million) loops.
+ *
+ *		long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ *		where **index** is the current index in the loop. The index
+ *		is zero-indexed.
+ *
+ *		If **callback_fn** returns 0, the helper will continue to the next
+ *		loop. If return value is 1, the helper will skip the rest of
+ *		the loops and return. Other return values are not used now,
+ *		and will be rejected by the verifier.
+ *
+ *	Return
+ *		The number of loops performed, **-EINVAL** for invalid **flags**,
+ *		**-E2BIG** if **nr_loops** exceeds the maximum number of loops.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5140,6 +5164,7 @@ union bpf_attr {
 	FN(skc_to_unix_sock),		\
 	FN(kallsyms_lookup_name),	\
 	FN(find_vma),			\
+	FN(loop),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 7ad639840acf2800b5f387c495795f995a67a329 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 29 Nov 2021 13:06:43 +0000
Subject: thread_info: Add helpers to snapshot thread flags

In <linux/thread_info.h> there are helpers to manipulate individual thread
flags, but where code wants to check several flags at once, it must open
code reading current_thread_info()->flags and operating on a snapshot.

As some flags can be set remotely it's necessary to use READ_ONCE() to get
a consistent snapshot even when IRQs are disabled, but some code forgets to
do this. Generally this is unlike to cause a problem in practice, but it is
somewhat unsound, and KCSAN will legitimately warn that there is a data
race.

To make it easier to do the right thing, and to highlight that concurrent
modification is possible, add new helpers to snapshot the flags, which
should be used in preference to plain reads. Subsequent patches will move
existing code to use the new helpers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marco Elver <elver@google.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20211129130653.2037928-2-mark.rutland@arm.com
---
 include/linux/thread_info.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ad0c4e041030..73a6f34b3847 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -118,6 +118,15 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 	return test_bit(flag, (unsigned long *)&ti->flags);
 }
 
+/*
+ * This may be used in noinstr code, and needs to be __always_inline to prevent
+ * inadvertent instrumentation.
+ */
+static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti)
+{
+	return READ_ONCE(ti->flags);
+}
+
 #define set_thread_flag(flag) \
 	set_ti_thread_flag(current_thread_info(), flag)
 #define clear_thread_flag(flag) \
@@ -130,6 +139,11 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 	test_and_clear_ti_thread_flag(current_thread_info(), flag)
 #define test_thread_flag(flag) \
 	test_ti_thread_flag(current_thread_info(), flag)
+#define read_thread_flags() \
+	read_ti_thread_flags(current_thread_info())
+
+#define read_task_thread_flags(t) \
+	read_ti_thread_flags(task_thread_info(t))
 
 #ifdef CONFIG_GENERIC_ENTRY
 #define set_syscall_work(fl) \
-- 
cgit v1.2.3


From 6ce895128b3bff738fe8d9dd74747a03e319e466 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 29 Nov 2021 13:06:44 +0000
Subject: entry: Snapshot thread flags

Some thread flags can be set remotely, and so even when IRQs are disabled,
the flags can change under our feet. Generally this is unlikely to cause a
problem in practice, but it is somewhat unsound, and KCSAN will
legitimately warn that there is a data race.

To avoid such issues, a snapshot of the flags has to be taken prior to
using them. Some places already use READ_ONCE() for that, others do not.

Convert them all to the new flag accessor helpers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20211129130653.2037928-3-mark.rutland@arm.com
---
 include/linux/entry-kvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 0d7865a0731c..07c878d6e323 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -75,7 +75,7 @@ static inline void xfer_to_guest_mode_prepare(void)
  */
 static inline bool __xfer_to_guest_mode_work_pending(void)
 {
-	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+	unsigned long ti_work = read_thread_flags();
 
 	return !!(ti_work & XFER_TO_GUEST_MODE_WORK);
 }
-- 
cgit v1.2.3


From 4946f15e8c334840bf277a0bf924371eae120fcd Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Tue, 30 Nov 2021 22:40:43 +0100
Subject: genirq/generic_chip: Constify irq_generic_chip_ops

The only usage of irq_generic_chip_ops is to pass its address to
irq_domain_add_linear() which takes a pointer to const struct
irq_domain_ops. Make it const to allow the compiler to put it in
read-only memory.

[ tglx: Fixed subject prefix ]

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20211130214043.1257585-1-rikard.falkeborn@gmail.com
---
 include/linux/irqdomain.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 553da4899f55..d476405802e9 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -131,7 +131,7 @@ struct irq_domain_ops {
 #endif
 };
 
-extern struct irq_domain_ops irq_generic_chip_ops;
+extern const struct irq_domain_ops irq_generic_chip_ops;
 
 struct irq_domain_chip_generic;
 
-- 
cgit v1.2.3


From 24ba53017e188e031f9cb8b290286fad52d2af00 Mon Sep 17 00:00:00 2001
From: Chun-Hung Tseng <henrybear327@gmail.com>
Date: Wed, 15 Sep 2021 17:02:18 +0800
Subject: rcu: Replace ________p1 and _________p1 with __UNIQUE_ID(rcu)

This commit replaces both ________p1 and _________p1 with __UNIQUE_ID(rcu),
and also adjusts the callers of the affected macros.

__UNIQUE_ID(rcu) will generate unique variable names during compilation,
which eliminates the need of ________p1 and _________p1 (both having 4
occurrences prior to the code change).  This also avoids the variable
name shadowing issue, or at least makes those wishing to cause shadowing
problems work much harder to do so.

The same idea is used for the min/max macros (commit 589a978 and commit
e9092d0).

Signed-off-by: Jim Huang <jserv@ccns.ncku.edu.tw>
Signed-off-by: Chun-Hung Tseng <henrybear327@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h | 50 +++++++++++++++++++++++++++---------------------
 include/linux/srcu.h     |  3 ++-
 2 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5e0beb5c5659..88b42eb46406 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -364,6 +364,12 @@ static inline void rcu_preempt_sleep_check(void) { }
 #define rcu_check_sparse(p, space)
 #endif /* #else #ifdef __CHECKER__ */
 
+#define __unrcu_pointer(p, local)					\
+({									\
+	typeof(*p) *local = (typeof(*p) *__force)(p);			\
+	rcu_check_sparse(p, __rcu);					\
+	((typeof(*p) __force __kernel *)(local)); 			\
+})
 /**
  * unrcu_pointer - mark a pointer as not being RCU protected
  * @p: pointer needing to lose its __rcu property
@@ -371,39 +377,35 @@ static inline void rcu_preempt_sleep_check(void) { }
  * Converts @p from an __rcu pointer to a __kernel pointer.
  * This allows an __rcu pointer to be used with xchg() and friends.
  */
-#define unrcu_pointer(p)						\
-({									\
-	typeof(*p) *_________p1 = (typeof(*p) *__force)(p);		\
-	rcu_check_sparse(p, __rcu);					\
-	((typeof(*p) __force __kernel *)(_________p1)); 		\
-})
+#define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu))
 
-#define __rcu_access_pointer(p, space) \
+#define __rcu_access_pointer(p, local, space) \
 ({ \
-	typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
+	typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
 	rcu_check_sparse(p, space); \
-	((typeof(*p) __force __kernel *)(_________p1)); \
+	((typeof(*p) __force __kernel *)(local)); \
 })
-#define __rcu_dereference_check(p, c, space) \
+#define __rcu_dereference_check(p, local, c, space) \
 ({ \
 	/* Dependency order vs. p above. */ \
-	typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
+	typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
 	RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
 	rcu_check_sparse(p, space); \
-	((typeof(*p) __force __kernel *)(________p1)); \
+	((typeof(*p) __force __kernel *)(local)); \
 })
-#define __rcu_dereference_protected(p, c, space) \
+#define __rcu_dereference_protected(p, local, c, space) \
 ({ \
 	RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \
 	rcu_check_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(p)); \
 })
-#define rcu_dereference_raw(p) \
+#define __rcu_dereference_raw(p, local) \
 ({ \
 	/* Dependency order vs. p above. */ \
-	typeof(p) ________p1 = READ_ONCE(p); \
-	((typeof(*p) __force __kernel *)(________p1)); \
+	typeof(p) local = READ_ONCE(p); \
+	((typeof(*p) __force __kernel *)(local)); \
 })
+#define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu))
 
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
@@ -490,7 +492,7 @@ do {									      \
  * when tearing down multi-linked structures after a grace period
  * has elapsed.
  */
-#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
+#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu)
 
 /**
  * rcu_dereference_check() - rcu_dereference with debug checking
@@ -526,7 +528,8 @@ do {									      \
  * annotated as __rcu.
  */
 #define rcu_dereference_check(p, c) \
-	__rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu)
+	__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
+				(c) || rcu_read_lock_held(), __rcu)
 
 /**
  * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
@@ -541,7 +544,8 @@ do {									      \
  * rcu_read_lock() but also rcu_read_lock_bh() into account.
  */
 #define rcu_dereference_bh_check(p, c) \
-	__rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu)
+	__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
+				(c) || rcu_read_lock_bh_held(), __rcu)
 
 /**
  * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
@@ -556,7 +560,8 @@ do {									      \
  * only rcu_read_lock() but also rcu_read_lock_sched() into account.
  */
 #define rcu_dereference_sched_check(p, c) \
-	__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
+	__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
+				(c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
 /*
@@ -566,7 +571,8 @@ do {									      \
  * The no-tracing version of rcu_dereference_raw() must not call
  * rcu_read_lock_held().
  */
-#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu)
+#define rcu_dereference_raw_check(p) \
+	__rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu)
 
 /**
  * rcu_dereference_protected() - fetch RCU pointer when updates prevented
@@ -585,7 +591,7 @@ do {									      \
  * but very ugly failures.
  */
 #define rcu_dereference_protected(p, c) \
-	__rcu_dereference_protected((p), (c), __rcu)
+	__rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu)
 
 
 /**
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e6011a9975af..01226e4d960a 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -117,7 +117,8 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
  * lockdep_is_held() calls.
  */
 #define srcu_dereference_check(p, ssp, c) \
-	__rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu)
+	__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
+				(c) || srcu_read_lock_held(ssp), __rcu)
 
 /**
  * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
-- 
cgit v1.2.3


From 2407a64f8045552203ee5cb9904ce75ce2fceef4 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 28 Sep 2021 08:21:28 +0800
Subject: rcu: in_irq() cleanup

This commit replaces the obsolete and ambiguous macro in_irq() with its
shiny new in_hardirq() equivalent.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcutiny.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 9be015305f9f..858f4d429946 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { }
 static inline void rcu_irq_exit(void) { }
 static inline void rcu_irq_exit_check_preempt(void) { }
 #define rcu_is_idle_cpu(cpu) \
-	(is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq())
+	(is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
 static inline void exit_rcu(void) { }
 static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
-- 
cgit v1.2.3


From 502e82b91361955c66c8453b5b7a905b0b5bd5a1 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Nov 2021 17:21:45 +0200
Subject: net/mlx5: Fix access to a non-supported register

Validate MRTC register is supported before triggering a delayed work
which accesses it.

Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3636df90899a..fbaab440a484 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         regs_84_to_68[0x11];
 	u8         tracer_registers[0x4];
 
-	u8         regs_63_to_32[0x20];
+	u8         regs_63_to_46[0x12];
+	u8         mrtc[0x1];
+	u8         regs_44_to_32[0xd];
+
 	u8         regs_31_to_0[0x20];
 };
 
-- 
cgit v1.2.3


From af3bf054661fb11497a7f712ece8b838521227a4 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Wed, 1 Dec 2021 01:17:36 +0000
Subject: cgroup: fix a typo in comment

In commit 8699b7762a62 ("cgroup: s/child_subsys_mask/subtree_ss_mask/"),
we rename child_subsys_mask to subtree_ss_mask. While it missed to
rename this in comment.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index db2e147e069f..bb1e79791ed5 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -413,7 +413,7 @@ struct cgroup {
 	/*
 	 * The bitmask of subsystems enabled on the child cgroups.
 	 * ->subtree_control is the one configured through
-	 * "cgroup.subtree_control" while ->child_ss_mask is the effective
+	 * "cgroup.subtree_control" while ->subtree_ss_mask is the effective
 	 * one which may have more subsystems enabled.  Controller knobs
 	 * are made available iff it's enabled in ->subtree_control.
 	 */
-- 
cgit v1.2.3


From 2696f9010d21aee60be06b2135806e11c79ded8b Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Wed, 10 Nov 2021 09:50:34 -0500
Subject: drm/ttm: Clarify that the TTM_PL_SYSTEM is under TTMs control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TTM takes full control over TTM_PL_SYSTEM placed buffers. This makes
driver internal usage of TTM_PL_SYSTEM prone to errors because it
requires the drivers to manually handle all interactions between TTM
which can swap out those buffers whenever it thinks it's the right
thing to do and driver.

CPU buffers which need to be fenced and shared with accelerators should
be placed in driver specific placements that can explicitly handle
CPU/accelerator buffer fencing.
Currently, apart, from things silently failing nothing is enforcing
that requirement which means that it's easy for drivers and new
developers to get this wrong. To avoid the confusion we can document
this requirement and clarify the solution.

This came up during a discussion on dri-devel:
https://lore.kernel.org/dri-devel/232f45e9-8748-1243-09bf-56763e6668b3@amd.com

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110145034.487512-1-zackr@vmware.com
---
 include/drm/ttm/ttm_placement.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
index 76d1b9119a2b..8074d0f6cae5 100644
--- a/include/drm/ttm/ttm_placement.h
+++ b/include/drm/ttm/ttm_placement.h
@@ -35,6 +35,17 @@
 
 /*
  * Memory regions for data placement.
+ *
+ * Buffers placed in TTM_PL_SYSTEM are considered under TTMs control and can
+ * be swapped out whenever TTMs thinks it is a good idea.
+ * In cases where drivers would like to use TTM_PL_SYSTEM as a valid
+ * placement they need to be able to handle the issues that arise due to the
+ * above manually.
+ *
+ * For BO's which reside in system memory but for which the accelerator
+ * requires direct access (i.e. their usage needs to be synchronized
+ * between the CPU and accelerator via fences) a new, driver private
+ * placement that can handle such scenarios is a good idea.
  */
 
 #define TTM_PL_SYSTEM           0
-- 
cgit v1.2.3


From e14da77113bb890d7bf9e5d17031bdd476a7ce5e Mon Sep 17 00:00:00 2001
From: William Kucharski <william.kucharski@oracle.com>
Date: Wed, 1 Dec 2021 09:56:58 -0700
Subject: cgroup: Trace event cgroup id fields should be u64

Various trace event fields that store cgroup IDs were declared as
ints, but cgroup_id(() returns a u64 and the structures and associated
TP_printk() calls were not updated to reflect this.

Fixes: 743210386c03 ("cgroup: use cgrp->kn->id as the cgroup ID")
Signed-off-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/trace/events/cgroup.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index 7f42a3de59e6..dd7d7c9efecd 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup,
 
 	TP_STRUCT__entry(
 		__field(	int,		root			)
-		__field(	int,		id			)
 		__field(	int,		level			)
+		__field(	u64,		id			)
 		__string(	path,		path			)
 	),
 
@@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup,
 		__assign_str(path, path);
 	),
 
-	TP_printk("root=%d id=%d level=%d path=%s",
+	TP_printk("root=%d id=%llu level=%d path=%s",
 		  __entry->root, __entry->id, __entry->level, __get_str(path))
 );
 
@@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
 
 	TP_STRUCT__entry(
 		__field(	int,		dst_root		)
-		__field(	int,		dst_id			)
 		__field(	int,		dst_level		)
+		__field(	u64,		dst_id			)
 		__field(	int,		pid			)
 		__string(	dst_path,	path			)
 		__string(	comm,		task->comm		)
@@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
 		__assign_str(comm, task->comm);
 	),
 
-	TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s",
+	TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
 		  __entry->dst_root, __entry->dst_id, __entry->dst_level,
 		  __get_str(dst_path), __entry->pid, __get_str(comm))
 );
@@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event,
 
 	TP_STRUCT__entry(
 		__field(	int,		root			)
-		__field(	int,		id			)
 		__field(	int,		level			)
+		__field(	u64,		id			)
 		__string(	path,		path			)
 		__field(	int,		val			)
 	),
@@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event,
 		__entry->val = val;
 	),
 
-	TP_printk("root=%d id=%d level=%d path=%s val=%d",
+	TP_printk("root=%d id=%llu level=%d path=%s val=%d",
 		  __entry->root, __entry->id, __entry->level, __get_str(path),
 		  __entry->val)
 );
-- 
cgit v1.2.3


From 1a68b346a2c9969c05e80a3b99a9ab160b5655c0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 22 Nov 2021 18:05:31 +0100
Subject: ACPI: Change acpi_device_always_present() into
 acpi_device_override_status()

Currently, acpi_bus_get_status() calls acpi_device_always_present() to
allow platform quirks to override the _STA return to report that a
device is present (status = ACPI_STA_DEFAULT) independent of the _STA
return.

In some cases it might also be useful to have the opposite functionality
and have a platform quirk which marks a device as not present (status = 0)
to work around ACPI table bugs.

Change acpi_device_always_present() into a more generic
acpi_device_override_status() function to allow this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 480f9207a4c6..d6fe27b695c3 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -613,9 +613,10 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 
 #ifdef CONFIG_X86
-bool acpi_device_always_present(struct acpi_device *adev);
+bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status);
 #else
-static inline bool acpi_device_always_present(struct acpi_device *adev)
+static inline bool acpi_device_override_status(struct acpi_device *adev,
+					       unsigned long long *status)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 6bbfa44116689469267f1a6e3d233b52114139d2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 1 Dec 2021 23:45:50 +0900
Subject: kprobes: Limit max data_size of the kretprobe instances

The 'kprobe::data_size' is unsigned, thus it can not be negative.  But if
user sets it enough big number (e.g. (size_t)-8), the result of 'data_size
+ sizeof(struct kretprobe_instance)' becomes smaller than sizeof(struct
kretprobe_instance) or zero. In result, the kretprobe_instance are
allocated without enough memory, and kretprobe accesses outside of
allocated memory.

To avoid this issue, introduce a max limitation of the
kretprobe::data_size. 4KB per instance should be OK.

Link: https://lkml.kernel.org/r/163836995040.432120.10322772773821182925.stgit@devnote2

Cc: stable@vger.kernel.org
Fixes: f47cd9b553aa ("kprobes: kretprobe user entry-handler")
Reported-by: zhangyue <zhangyue1@kylinos.cn>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e974caf39d3e..8c8f7a4d93af 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -153,6 +153,8 @@ struct kretprobe {
 	struct kretprobe_holder *rph;
 };
 
+#define KRETPROBE_MAX_DATA_SIZE	4096
+
 struct kretprobe_instance {
 	union {
 		struct freelist_node freelist;
-- 
cgit v1.2.3


From 072eea6c22b2af680c3949e64f9adde278c71e68 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 30 Nov 2021 13:10:01 +0000
Subject: net: dsa: replace phylink_get_interfaces() with phylink_get_caps()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phylink needs slightly more information than phylink_get_interfaces()
allows us to get from the DSA drivers - we need the MAC capabilities.
Replace the phylink_get_interfaces() method with phylink_get_caps() to
allow DSA drivers to fill in the phylink_config MAC capabilities field
as well.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index eff5c44ba377..8ca9d50cbbc2 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -645,8 +645,8 @@ struct dsa_switch_ops {
 	/*
 	 * PHYLINK integration
 	 */
-	void	(*phylink_get_interfaces)(struct dsa_switch *ds, int port,
-					  unsigned long *supported_interfaces);
+	void	(*phylink_get_caps)(struct dsa_switch *ds, int port,
+				    struct phylink_config *config);
 	void	(*phylink_validate)(struct dsa_switch *ds, int port,
 				    unsigned long *supported,
 				    struct phylink_link_state *state);
-- 
cgit v1.2.3


From ce8299b6f76f28326fedce2b4da90888bd97eab2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Nov 2021 19:32:46 -0800
Subject: Revert "net: snmp: add statistics for tcp small queue check"

This reverts commit aeeecb889165617a841e939117f9a8095d0e7d80.

The new SNMP variable (TCPSmallQueueFailure) can be incremented
for good reasons, even on a 100Gbit single TCP_STREAM flow.

If we really wanted to ease driver debugging [1], this would
require something more sophisticated.

[1] Usually, if a driver is delaying TX completions too much,
this can lead to stalls in TCP output. Various work arounds
have been used in the past, like skb_orphan() in ndo_start_xmit().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Menglong Dong <imagedong@tencent.com>
Link: https://lore.kernel.org/r/20211201033246.2826224-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/snmp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index e32ec6932e82..904909d020e2 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -292,7 +292,6 @@ enum
 	LINUX_MIB_TCPDSACKIGNOREDDUBIOUS,	/* TCPDSACKIgnoredDubious */
 	LINUX_MIB_TCPMIGRATEREQSUCCESS,		/* TCPMigrateReqSuccess */
 	LINUX_MIB_TCPMIGRATEREQFAILURE,		/* TCPMigrateReqFailure */
-	LINUX_MIB_TCPSMALLQUEUEFAILURE,		/* TCPSmallQueueFailure */
 	__LINUX_MIB_MAX
 };
 
-- 
cgit v1.2.3


From 7a10d8c810cfad3e79372d7d1c77899d86cd6662 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Nov 2021 09:01:55 -0800
Subject: net: annotate data-races on txq->xmit_lock_owner

syzbot found that __dev_queue_xmit() is reading txq->xmit_lock_owner
without annotations.

No serious issue there, let's document what is happening there.

BUG: KCSAN: data-race in __dev_queue_xmit / __dev_queue_xmit

write to 0xffff888139d09484 of 4 bytes by interrupt on cpu 0:
 __netif_tx_unlock include/linux/netdevice.h:4437 [inline]
 __dev_queue_xmit+0x948/0xf70 net/core/dev.c:4229
 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265
 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline]
 macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567
 __netdev_start_xmit include/linux/netdevice.h:4987 [inline]
 netdev_start_xmit include/linux/netdevice.h:5001 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3590
 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606
 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342
 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817
 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259
 neigh_hh_output include/net/neighbour.h:511 [inline]
 neigh_output include/net/neighbour.h:525 [inline]
 ip6_finish_output2+0x995/0xbb0 net/ipv6/ip6_output.c:126
 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
 ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224
 dst_output include/net/dst.h:450 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508
 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702
 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898
 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421
 expire_timers+0x116/0x240 kernel/time/timer.c:1466
 __run_timers+0x368/0x410 kernel/time/timer.c:1734
 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747
 __do_softirq+0x158/0x2de kernel/softirq.c:558
 __irq_exit_rcu kernel/softirq.c:636 [inline]
 irq_exit_rcu+0x37/0x70 kernel/softirq.c:648
 sysvec_apic_timer_interrupt+0x3e/0xb0 arch/x86/kernel/apic/apic.c:1097
 asm_sysvec_apic_timer_interrupt+0x12/0x20

read to 0xffff888139d09484 of 4 bytes by interrupt on cpu 1:
 __dev_queue_xmit+0x5e3/0xf70 net/core/dev.c:4213
 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265
 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline]
 macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567
 __netdev_start_xmit include/linux/netdevice.h:4987 [inline]
 netdev_start_xmit include/linux/netdevice.h:5001 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3590
 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606
 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342
 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817
 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259
 neigh_resolve_output+0x3db/0x410 net/core/neighbour.c:1523
 neigh_output include/net/neighbour.h:527 [inline]
 ip6_finish_output2+0x9be/0xbb0 net/ipv6/ip6_output.c:126
 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
 ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224
 dst_output include/net/dst.h:450 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508
 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702
 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898
 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421
 expire_timers+0x116/0x240 kernel/time/timer.c:1466
 __run_timers+0x368/0x410 kernel/time/timer.c:1734
 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747
 __do_softirq+0x158/0x2de kernel/softirq.c:558
 __irq_exit_rcu kernel/softirq.c:636 [inline]
 irq_exit_rcu+0x37/0x70 kernel/softirq.c:648
 sysvec_apic_timer_interrupt+0x8d/0xb0 arch/x86/kernel/apic/apic.c:1097
 asm_sysvec_apic_timer_interrupt+0x12/0x20
 kcsan_setup_watchpoint+0x94/0x420 kernel/kcsan/core.c:443
 folio_test_anon include/linux/page-flags.h:581 [inline]
 PageAnon include/linux/page-flags.h:586 [inline]
 zap_pte_range+0x5ac/0x10e0 mm/memory.c:1347
 zap_pmd_range mm/memory.c:1467 [inline]
 zap_pud_range mm/memory.c:1496 [inline]
 zap_p4d_range mm/memory.c:1517 [inline]
 unmap_page_range+0x2dc/0x3d0 mm/memory.c:1538
 unmap_single_vma+0x157/0x210 mm/memory.c:1583
 unmap_vmas+0xd0/0x180 mm/memory.c:1615
 exit_mmap+0x23d/0x470 mm/mmap.c:3170
 __mmput+0x27/0x1b0 kernel/fork.c:1113
 mmput+0x3d/0x50 kernel/fork.c:1134
 exit_mm+0xdb/0x170 kernel/exit.c:507
 do_exit+0x608/0x17a0 kernel/exit.c:819
 do_group_exit+0xce/0x180 kernel/exit.c:929
 get_signal+0xfc3/0x1550 kernel/signal.c:2852
 arch_do_signal_or_restart+0x8c/0x2e0 arch/x86/kernel/signal.c:868
 handle_signal_work kernel/entry/common.c:148 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
 exit_to_user_mode_prepare+0x113/0x190 kernel/entry/common.c:207
 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
 syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:300
 do_syscall_64+0x50/0xd0 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000000 -> 0xffffffff

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 28712 Comm: syz-executor.0 Tainted: G        W         5.16.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20211130170155.2331929-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec42495a43a..be5cb3360b94 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4404,7 +4404,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
 	spin_lock(&txq->_xmit_lock);
-	txq->xmit_lock_owner = cpu;
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, cpu);
 }
 
 static inline bool __netif_tx_acquire(struct netdev_queue *txq)
@@ -4421,26 +4422,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
 	spin_lock_bh(&txq->_xmit_lock);
-	txq->xmit_lock_owner = smp_processor_id();
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 }
 
 static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 {
 	bool ok = spin_trylock(&txq->_xmit_lock);
-	if (likely(ok))
-		txq->xmit_lock_owner = smp_processor_id();
+
+	if (likely(ok)) {
+		/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+		WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
+	}
 	return ok;
 }
 
 static inline void __netif_tx_unlock(struct netdev_queue *txq)
 {
-	txq->xmit_lock_owner = -1;
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock(&txq->_xmit_lock);
 }
 
 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 {
-	txq->xmit_lock_owner = -1;
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock_bh(&txq->_xmit_lock);
 }
 
-- 
cgit v1.2.3


From a37a0ee4d25c152a087c5e913b76f207eaebdb54 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Nov 2021 10:29:39 -0800
Subject: net: avoid uninit-value from tcp_conn_request

A recent change triggers a KMSAN warning, because request
sockets do not initialize @sk_rx_queue_mapping field.

Add sk_rx_queue_update() helper to make our intent clear.

BUG: KMSAN: uninit-value in sk_rx_queue_set include/net/sock.h:1922 [inline]
BUG: KMSAN: uninit-value in tcp_conn_request+0x3bcc/0x4dc0 net/ipv4/tcp_input.c:6922
 sk_rx_queue_set include/net/sock.h:1922 [inline]
 tcp_conn_request+0x3bcc/0x4dc0 net/ipv4/tcp_input.c:6922
 tcp_v4_conn_request+0x218/0x2a0 net/ipv4/tcp_ipv4.c:1528
 tcp_rcv_state_process+0x2c5/0x3290 net/ipv4/tcp_input.c:6406
 tcp_v4_do_rcv+0xb4e/0x1330 net/ipv4/tcp_ipv4.c:1738
 tcp_v4_rcv+0x468d/0x4ed0 net/ipv4/tcp_ipv4.c:2100
 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204
 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252
 dst_input include/net/dst.h:460 [inline]
 ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline]
 ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline]
 ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609
 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644
 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline]
 __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553
 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605
 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696
 gro_normal_list net/core/dev.c:5850 [inline]
 napi_complete_done+0x579/0xdd0 net/core/dev.c:6587
 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline]
 virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557
 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020
 napi_poll net/core/dev.c:7087 [inline]
 net_rx_action+0x824/0x1880 net/core/dev.c:7174
 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558
 invoke_softirq+0xa4/0x130 kernel/softirq.c:432
 __irq_exit_rcu kernel/softirq.c:636 [inline]
 irq_exit_rcu+0x76/0x130 kernel/softirq.c:648
 common_interrupt+0xb6/0xd0 arch/x86/kernel/irq.c:240
 asm_common_interrupt+0x1e/0x40
 smap_restore arch/x86/include/asm/smap.h:67 [inline]
 get_shadow_origin_ptr mm/kmsan/instrumentation.c:31 [inline]
 __msan_metadata_ptr_for_load_1+0x28/0x30 mm/kmsan/instrumentation.c:63
 tomoyo_check_acl+0x1b0/0x630 security/tomoyo/domain.c:173
 tomoyo_path_permission security/tomoyo/file.c:586 [inline]
 tomoyo_check_open_permission+0x61f/0xe10 security/tomoyo/file.c:777
 tomoyo_file_open+0x24f/0x2d0 security/tomoyo/tomoyo.c:311
 security_file_open+0xb1/0x1f0 security/security.c:1635
 do_dentry_open+0x4e4/0x1bf0 fs/open.c:809
 vfs_open+0xaf/0xe0 fs/open.c:957
 do_open fs/namei.c:3426 [inline]
 path_openat+0x52f1/0x5dd0 fs/namei.c:3559
 do_filp_open+0x306/0x760 fs/namei.c:3586
 do_sys_openat2+0x263/0x8f0 fs/open.c:1212
 do_sys_open fs/open.c:1228 [inline]
 __do_sys_open fs/open.c:1236 [inline]
 __se_sys_open fs/open.c:1232 [inline]
 __x64_sys_open+0x314/0x380 fs/open.c:1232
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Uninit was created at:
 __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409
 alloc_pages+0x8a5/0xb80
 alloc_slab_page mm/slub.c:1810 [inline]
 allocate_slab+0x287/0x1c20 mm/slub.c:1947
 new_slab mm/slub.c:2010 [inline]
 ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039
 __slab_alloc mm/slub.c:3126 [inline]
 slab_alloc_node mm/slub.c:3217 [inline]
 slab_alloc mm/slub.c:3259 [inline]
 kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264
 reqsk_alloc include/net/request_sock.h:91 [inline]
 inet_reqsk_alloc+0xaf/0x8b0 net/ipv4/tcp_input.c:6712
 tcp_conn_request+0x910/0x4dc0 net/ipv4/tcp_input.c:6852
 tcp_v4_conn_request+0x218/0x2a0 net/ipv4/tcp_ipv4.c:1528
 tcp_rcv_state_process+0x2c5/0x3290 net/ipv4/tcp_input.c:6406
 tcp_v4_do_rcv+0xb4e/0x1330 net/ipv4/tcp_ipv4.c:1738
 tcp_v4_rcv+0x468d/0x4ed0 net/ipv4/tcp_ipv4.c:2100
 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204
 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252
 dst_input include/net/dst.h:460 [inline]
 ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline]
 ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline]
 ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609
 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644
 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline]
 __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553
 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605
 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696
 gro_normal_list net/core/dev.c:5850 [inline]
 napi_complete_done+0x579/0xdd0 net/core/dev.c:6587
 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline]
 virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557
 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020
 napi_poll net/core/dev.c:7087 [inline]
 net_rx_action+0x824/0x1880 net/core/dev.c:7174
 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558

Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20211130182939.2584764-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/busy_poll.h |  2 +-
 include/net/sock.h      | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 4202c609bb0b..7994455ec714 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -133,7 +133,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 	if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
 		WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
 #endif
-	sk_rx_queue_set(sk, skb);
+	sk_rx_queue_update(sk, skb);
 }
 
 static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
diff --git a/include/net/sock.h b/include/net/sock.h
index 715cdb4b2b79..bea21ff70e74 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1913,18 +1913,31 @@ static inline int sk_tx_queue_get(const struct sock *sk)
 	return -1;
 }
 
-static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+static inline void __sk_rx_queue_set(struct sock *sk,
+				     const struct sk_buff *skb,
+				     bool force_set)
 {
 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
 	if (skb_rx_queue_recorded(skb)) {
 		u16 rx_queue = skb_get_rx_queue(skb);
 
-		if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
+		if (force_set ||
+		    unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
 			WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue);
 	}
 #endif
 }
 
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+	__sk_rx_queue_set(sk, skb, true);
+}
+
+static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb)
+{
+	__sk_rx_queue_set(sk, skb, false);
+}
+
 static inline void sk_rx_queue_clear(struct sock *sk)
 {
 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
-- 
cgit v1.2.3


From 57b2b72ac1fc5d55cf3b13207942c109f1a65cb5 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 29 Nov 2021 09:57:59 -0700
Subject: mm, slab: Remove compiler check in __kmalloc_index

The minimum supported version of LLVM has been raised to 11.0.0, meaning
this check is always true, so it can be dropped.

Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/linux/slab.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 181045148b06..d3fb5ac71c24 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -411,8 +411,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
 	if (size <=  16 * 1024 * 1024) return 24;
 	if (size <=  32 * 1024 * 1024) return 25;
 
-	if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000)
-	    && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
+	if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
 		BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
 	else
 		BUG();
-- 
cgit v1.2.3


From 4ff22f487f8c26b99cbe1678344595734c001a39 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 30 Nov 2021 10:52:55 +0100
Subject: drm: Return error codes from struct drm_driver.gem_create_object

GEM helper libraries use struct drm_driver.gem_create_object to let
drivers override GEM object allocation. On failure, the call returns
NULL.

Change the semantics to make the calls return a pointer-encoded error.
This aligns the callback with its callers. Fixes the ingenic driver,
which already returns an error pointer.

Also update the callers to handle the involved types more strictly.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20211130095255.26710-1-tzimmermann@suse.de
---
 include/drm/drm_drv.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index da0c836fe8e1..f6159acb8856 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -291,8 +291,9 @@ struct drm_driver {
 	/**
 	 * @gem_create_object: constructor for gem objects
 	 *
-	 * Hook for allocating the GEM object struct, for use by the CMA and
-	 * SHMEM GEM helpers.
+	 * Hook for allocating the GEM object struct, for use by the CMA
+	 * and SHMEM GEM helpers. Returns a GEM object on success, or an
+	 * ERR_PTR()-encoded error code otherwise.
 	 */
 	struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
 						    size_t size);
-- 
cgit v1.2.3


From 213f5f8f31f10aa1e83187ae20fb7fa4e626b724 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Dec 2021 18:26:35 -0800
Subject: ipv4: convert fib_num_tclassid_users to atomic_t

Before commit faa041a40b9f ("ipv4: Create cleanup helper for fib_nh")
changes to net->ipv4.fib_num_tclassid_users were protected by RTNL.

After the change, this is no longer the case, as free_fib_info_rcu()
runs after rcu grace period, without rtnl being held.

Fixes: faa041a40b9f ("ipv4: Create cleanup helper for fib_nh")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Ahern <dsahern@kernel.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     | 2 +-
 include/net/netns/ipv4.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ab5348e57db1..3417ba2d27ad 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -438,7 +438,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 static inline int fib_num_tclassid_users(struct net *net)
 {
-	return net->ipv4.fib_num_tclassid_users;
+	return atomic_read(&net->ipv4.fib_num_tclassid_users);
 }
 #else
 static inline int fib_num_tclassid_users(struct net *net)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2f65701a43c9..6c5b2efc4f17 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -65,7 +65,7 @@ struct netns_ipv4 {
 	bool			fib_has_custom_local_routes;
 	bool			fib_offload_disabled;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-	int			fib_num_tclassid_users;
+	atomic_t		fib_num_tclassid_users;
 #endif
 	struct hlist_head	*fib_table_hash;
 	struct sock		*fibnl;
-- 
cgit v1.2.3


From 72f6a45202f20f0e1a46b0acb7803369cc53d0b8 Mon Sep 17 00:00:00 2001
From: Xiayu Zhang <Xiayu.Zhang@mediatek.com>
Date: Wed, 1 Dec 2021 10:57:13 +0800
Subject: Fix Comment of ETH_P_802_3_MIN

The description of ETH_P_802_3_MIN is misleading.
The value of EthernetType in Ethernet II frame is more than 0x0600,
the value of Length in 802.3 frame is less than 0x0600.

Signed-off-by: Xiayu Zhang <Xiayu.Zhang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5da4ee234e0b..c0c2f3ed5729 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -117,7 +117,7 @@
 #define ETH_P_IFE	0xED3E		/* ForCES inter-FE LFB type */
 #define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
-#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is less than this value
+#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is more than this value
 					 * then the frame is Ethernet II. Else it is 802.3 */
 
 /*
-- 
cgit v1.2.3


From e7f2be115f0746b969c0df14c0d182f65f005ca5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 26 Oct 2021 16:10:55 +0200
Subject: sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full

getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime
than the actual time.

task_cputime_adjusted() snapshots utime and stime and then adjust their
sum to match the scheduler maintained cputime.sum_exec_runtime.
Unfortunately in nohz_full, sum_exec_runtime is only updated once per
second in the worst case, causing a discrepancy against utime and stime
that can be updated anytime by the reader using vtime.

To fix this situation, perform an update of cputime.sum_exec_runtime
when the cputime snapshot reports the task as actually running while
the tick is disabled. The related overhead is then contained within the
relevant situations.

Reported-by: Hasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Hasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Acked-by: Phil Auld <pauld@redhat.com>
Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org
---
 include/linux/sched/cputime.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 6c9f19a33865..ce3c58286062 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -18,15 +18,16 @@
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
+extern bool task_cputime(struct task_struct *t,
 			 u64 *utime, u64 *stime);
 extern u64 task_gtime(struct task_struct *t);
 #else
-static inline void task_cputime(struct task_struct *t,
+static inline bool task_cputime(struct task_struct *t,
 				u64 *utime, u64 *stime)
 {
 	*utime = t->utime;
 	*stime = t->stime;
+	return false;
 }
 
 static inline u64 task_gtime(struct task_struct *t)
-- 
cgit v1.2.3


From f83baa0cb6cfc92ebaf7f9d3a99d7e34f2e77a8a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 1 Dec 2021 19:35:01 +0100
Subject: HID: add hid_is_usb() function to make it simpler for USB detection

A number of HID drivers already call hid_is_using_ll_driver() but only
for the detection of if this is a USB device or not.  Make this more
obvious by creating hid_is_usb() and calling the function that way.

Also converts the existing hid_is_using_ll_driver() functions to use the
new call.

Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Cc: stable@vger.kernel.org
Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211201183503.2373082-1-gregkh@linuxfoundation.org
---
 include/linux/hid.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 9e067f937dbc..f453be385bd4 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -840,6 +840,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev,
 	return hdev->ll_driver == driver;
 }
 
+static inline bool hid_is_usb(struct hid_device *hdev)
+{
+	return hid_is_using_ll_driver(hdev, &usb_hid_driver);
+}
+
 #define	PM_HINT_FULLON	1<<5
 #define PM_HINT_NORMAL	1<<1
 
-- 
cgit v1.2.3


From 9e3562080950b6e3fe38a5e34ddd5b1c618f2019 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 2 Dec 2021 10:53:33 +0100
Subject: HID: add suspend/resume helpers

There is a lot of duplication of code in the HID low level drivers.
Better have everything in one place so we can eventually extend it
in a generic way.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20211202095334.14399-4-benjamin.tissoires@redhat.com
---
 include/linux/hid.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 9e067f937dbc..ebe3ec98db6b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -923,6 +923,16 @@ s32 hid_snto32(__u32 value, unsigned n);
 __u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
 		     unsigned offset, unsigned n);
 
+#ifdef CONFIG_PM
+int hid_driver_suspend(struct hid_device *hdev, pm_message_t state);
+int hid_driver_reset_resume(struct hid_device *hdev);
+int hid_driver_resume(struct hid_device *hdev);
+#else
+static inline int hid_driver_suspend(struct hid_device *hdev, pm_message_t state) { return 0; }
+static inline int hid_driver_reset_resume(struct hid_device *hdev) { return 0; }
+static inline int hid_driver_resume(struct hid_device *hdev) { return 0; }
+#endif
+
 /**
  * hid_device_io_start - enable HID input during probe, remove
  *
-- 
cgit v1.2.3


From f65a0b1f3e79444bba9ac56435eeb32db85ab2c9 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 2 Dec 2021 10:53:34 +0100
Subject: HID: do not inline some hid_hw_ functions

We don't gain much by having them as inline, and it
actually prevents us to attach a probe to those helpers.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20211202095334.14399-5-benjamin.tissoires@redhat.com
---
 include/linux/hid.h | 68 +++++------------------------------------------------
 1 file changed, 6 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index ebe3ec98db6b..b2fea7fc54a1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1066,6 +1066,12 @@ int __must_check hid_hw_start(struct hid_device *hdev,
 void hid_hw_stop(struct hid_device *hdev);
 int __must_check hid_hw_open(struct hid_device *hdev);
 void hid_hw_close(struct hid_device *hdev);
+void hid_hw_request(struct hid_device *hdev,
+		    struct hid_report *report, int reqtype);
+int hid_hw_raw_request(struct hid_device *hdev,
+		       unsigned char reportnum, __u8 *buf,
+		       size_t len, unsigned char rtype, int reqtype);
+int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len);
 
 /**
  * hid_hw_power - requests underlying HW to go into given power mode
@@ -1083,68 +1089,6 @@ static inline int hid_hw_power(struct hid_device *hdev, int level)
 }
 
 
-/**
- * hid_hw_request - send report request to device
- *
- * @hdev: hid device
- * @report: report to send
- * @reqtype: hid request type
- */
-static inline void hid_hw_request(struct hid_device *hdev,
-				  struct hid_report *report, int reqtype)
-{
-	if (hdev->ll_driver->request)
-		return hdev->ll_driver->request(hdev, report, reqtype);
-
-	__hid_request(hdev, report, reqtype);
-}
-
-/**
- * hid_hw_raw_request - send report request to device
- *
- * @hdev: hid device
- * @reportnum: report ID
- * @buf: in/out data to transfer
- * @len: length of buf
- * @rtype: HID report type
- * @reqtype: HID_REQ_GET_REPORT or HID_REQ_SET_REPORT
- *
- * Return: count of data transferred, negative if error
- *
- * Same behavior as hid_hw_request, but with raw buffers instead.
- */
-static inline int hid_hw_raw_request(struct hid_device *hdev,
-				  unsigned char reportnum, __u8 *buf,
-				  size_t len, unsigned char rtype, int reqtype)
-{
-	if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf)
-		return -EINVAL;
-
-	return hdev->ll_driver->raw_request(hdev, reportnum, buf, len,
-						    rtype, reqtype);
-}
-
-/**
- * hid_hw_output_report - send output report to device
- *
- * @hdev: hid device
- * @buf: raw data to transfer
- * @len: length of buf
- *
- * Return: count of data transferred, negative if error
- */
-static inline int hid_hw_output_report(struct hid_device *hdev, __u8 *buf,
-					size_t len)
-{
-	if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf)
-		return -EINVAL;
-
-	if (hdev->ll_driver->output_report)
-		return hdev->ll_driver->output_report(hdev, buf, len);
-
-	return -ENOSYS;
-}
-
 /**
  * hid_hw_idle - send idle request to device
  *
-- 
cgit v1.2.3


From 8293eb995f349aed28006792cad4cb48091919dd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 1 Dec 2021 10:10:25 -0800
Subject: bpf: Rename btf_member accessors.

Rename btf_member_bit_offset() and btf_member_bitfield_size() to
avoid conflicts with similarly named helpers in libbpf's btf.h.
Rename the kernel helpers, since libbpf helpers are part of uapi.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211201181040.23337-3-alexei.starovoitov@gmail.com
---
 include/linux/btf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 203eef993d76..956f70388f69 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -194,15 +194,15 @@ static inline bool btf_type_kflag(const struct btf_type *t)
 	return BTF_INFO_KFLAG(t->info);
 }
 
-static inline u32 btf_member_bit_offset(const struct btf_type *struct_type,
-					const struct btf_member *member)
+static inline u32 __btf_member_bit_offset(const struct btf_type *struct_type,
+					  const struct btf_member *member)
 {
 	return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset)
 					   : member->offset;
 }
 
-static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type,
-					   const struct btf_member *member)
+static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type,
+					     const struct btf_member *member)
 {
 	return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset)
 					   : 0;
-- 
cgit v1.2.3


From 29db4bea1d10b73749d7992c1fc9ac13499e8871 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 1 Dec 2021 10:10:26 -0800
Subject: bpf: Prepare relo_core.c for kernel duty.

Make relo_core.c to be compiled for the kernel and for user space libbpf.

Note the patch is reducing BPF_CORE_SPEC_MAX_LEN from 64 to 32.
This is the maximum number of nested structs and arrays.
For example:
 struct sample {
     int a;
     struct {
         int b[10];
     };
 };

 struct sample *s = ...;
 int *y = &s->b[5];
This field access is encoded as "0:1:0:5" and spec len is 4.

The follow up patch might bump it back to 64.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211201181040.23337-4-alexei.starovoitov@gmail.com
---
 include/linux/btf.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 956f70388f69..acef6ef28768 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -144,6 +144,53 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
 }
 
+static inline bool str_is_empty(const char *s)
+{
+	return !s || !s[0];
+}
+
+static inline u16 btf_kind(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info);
+}
+
+static inline bool btf_is_enum(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ENUM;
+}
+
+static inline bool btf_is_composite(const struct btf_type *t)
+{
+	u16 kind = btf_kind(t);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_array(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ARRAY;
+}
+
+static inline bool btf_is_int(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_INT;
+}
+
+static inline bool btf_is_ptr(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_PTR;
+}
+
+static inline u8 btf_int_offset(const struct btf_type *t)
+{
+	return BTF_INT_OFFSET(*(u32 *)(t + 1));
+}
+
+static inline u8 btf_int_encoding(const struct btf_type *t)
+{
+	return BTF_INT_ENCODING(*(u32 *)(t + 1));
+}
+
 static inline bool btf_type_is_scalar(const struct btf_type *t)
 {
 	return btf_type_is_int(t) || btf_type_is_enum(t);
@@ -184,6 +231,11 @@ static inline u16 btf_type_vlen(const struct btf_type *t)
 	return BTF_INFO_VLEN(t->info);
 }
 
+static inline u16 btf_vlen(const struct btf_type *t)
+{
+	return btf_type_vlen(t);
+}
+
 static inline u16 btf_func_linkage(const struct btf_type *t)
 {
 	return BTF_INFO_VLEN(t->info);
@@ -208,11 +260,40 @@ static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type,
 					   : 0;
 }
 
+static inline struct btf_member *btf_members(const struct btf_type *t)
+{
+	return (struct btf_member *)(t + 1);
+}
+
+static inline u32 btf_member_bit_offset(const struct btf_type *t, u32 member_idx)
+{
+	const struct btf_member *m = btf_members(t) + member_idx;
+
+	return __btf_member_bit_offset(t, m);
+}
+
+static inline u32 btf_member_bitfield_size(const struct btf_type *t, u32 member_idx)
+{
+	const struct btf_member *m = btf_members(t) + member_idx;
+
+	return __btf_member_bitfield_size(t, m);
+}
+
 static inline const struct btf_member *btf_type_member(const struct btf_type *t)
 {
 	return (const struct btf_member *)(t + 1);
 }
 
+static inline struct btf_array *btf_array(const struct btf_type *t)
+{
+	return (struct btf_array *)(t + 1);
+}
+
+static inline struct btf_enum *btf_enum(const struct btf_type *t)
+{
+	return (struct btf_enum *)(t + 1);
+}
+
 static inline const struct btf_var_secinfo *btf_type_var_secinfo(
 		const struct btf_type *t)
 {
-- 
cgit v1.2.3


From 46334a0cd21bed70d6f1ddef1464f75a0ebe1774 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 1 Dec 2021 10:10:27 -0800
Subject: bpf: Define enum bpf_core_relo_kind as uapi.

enum bpf_core_relo_kind is generated by llvm and processed by libbpf.
It's a de-facto uapi.
With CO-RE in the kernel the bpf_core_relo_kind values become uapi de-jure.
Also rename them with BPF_CORE_ prefix to distinguish from conflicting names in
bpf_core_read.h. The enums bpf_field_info_kind, bpf_type_id_kind,
bpf_type_info_kind, bpf_enum_value_kind are passing different values from bpf
program into llvm.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211201181040.23337-5-alexei.starovoitov@gmail.com
---
 include/uapi/linux/bpf.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 211b43afd0fb..9e66b1880020 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6374,4 +6374,23 @@ enum {
 	BTF_F_ZERO	=	(1ULL << 3),
 };
 
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations. It is emitted by llvm and passed to
+ * libbpf and later to the kernel.
+ */
+enum bpf_core_relo_kind {
+	BPF_CORE_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+	BPF_CORE_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
+	BPF_CORE_FIELD_EXISTS = 2,           /* field existence in target kernel */
+	BPF_CORE_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
+	BPF_CORE_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
+	BPF_CORE_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
+	BPF_CORE_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
+	BPF_CORE_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
+	BPF_CORE_TYPE_EXISTS = 8,            /* type existence in target kernel */
+	BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
+	BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
+	BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From fbd94c7afcf99c9f3b1ba1168657ecc428eb2c8d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 1 Dec 2021 10:10:28 -0800
Subject: bpf: Pass a set of bpf_core_relo-s to prog_load command.

struct bpf_core_relo is generated by llvm and processed by libbpf.
It's a de-facto uapi.
With CO-RE in the kernel the struct bpf_core_relo becomes uapi de-jure.
Add an ability to pass a set of 'struct bpf_core_relo' to prog_load command
and let the kernel perform CO-RE relocations.

Note the struct bpf_line_info and struct bpf_func_info have the same
layout when passed from LLVM to libbpf and from libbpf to the kernel
except "insn_off" fields means "byte offset" when LLVM generates it.
Then libbpf converts it to "insn index" to pass to the kernel.
The struct bpf_core_relo's "insn_off" field is always "byte offset".

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211201181040.23337-6-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h      |  8 +++++++
 include/uapi/linux/bpf.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cad0829710be..8bbf08fbab66 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1732,6 +1732,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
 const struct btf_func_model *
 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 			 const struct bpf_insn *insn);
+struct bpf_core_ctx {
+	struct bpf_verifier_log *log;
+	const struct btf *btf;
+};
+
+int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
+		   int relo_idx, void *insn);
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9e66b1880020..c26871263f1f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1342,8 +1342,10 @@ union bpf_attr {
 			/* or valid module BTF object fd or 0 to attach to vmlinux */
 			__u32		attach_btf_obj_fd;
 		};
-		__u32		:32;		/* pad */
+		__u32		core_relo_cnt;	/* number of bpf_core_relo */
 		__aligned_u64	fd_array;	/* array of FDs */
+		__aligned_u64	core_relos;
+		__u32		core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -6393,4 +6395,59 @@ enum bpf_core_relo_kind {
 	BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
 };
 
+/*
+ * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf
+ * and from libbpf to the kernel.
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *       a sequence of field and array indices, separated by colon (:). It's
+ *       conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *       arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *       value within its enum type;
+ * - kind - one of enum bpf_core_relo_kind;
+ *
+ * Example:
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int *x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int *y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
+ *                       // b is field #0 inside anon struct, accessing elem #5)
+ *   int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *		  __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+	__u32 insn_off;
+	__u32 type_id;
+	__u32 access_str_off;
+	enum bpf_core_relo_kind kind;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From d9847eb8be3d895b2b5f514fdf3885d47a0b92a2 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 22 Nov 2021 20:17:40 +0530
Subject: bpf: Make CONFIG_DEBUG_INFO_BTF depend upon CONFIG_BPF_SYSCALL

Vinicius Costa Gomes reported [0] that build fails when
CONFIG_DEBUG_INFO_BTF is enabled and CONFIG_BPF_SYSCALL is disabled.
This leads to btf.c not being compiled, and then no symbol being present
in vmlinux for the declarations in btf.h. Since BTF is not useful
without enabling BPF subsystem, disallow this combination.

However, theoretically disabling both now could still fail, as the
symbol for kfunc_btf_id_list variables is not available. This isn't a
problem as the compiler usually optimizes the whole register/unregister
call, but at lower optimization levels it can fail the build in linking
stage.

Fix that by adding dummy variables so that modules taking address of
them still work, but the whole thing is a noop.

  [0]: https://lore.kernel.org/bpf/20211110205418.332403-1-vinicius.gomes@intel.com

Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration")
Reported-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211122144742.477787-2-memxor@gmail.com
---
 include/linux/btf.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 203eef993d76..0e1b6281fd8f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -245,7 +245,10 @@ struct kfunc_btf_id_set {
 	struct module *owner;
 };
 
-struct kfunc_btf_id_list;
+struct kfunc_btf_id_list {
+	struct list_head list;
+	struct mutex mutex;
+};
 
 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
@@ -254,6 +257,9 @@ void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
 				 struct kfunc_btf_id_set *s);
 bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 			      struct module *owner);
+
+extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
+extern struct kfunc_btf_id_list prog_test_kfunc_list;
 #else
 static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
 					     struct kfunc_btf_id_set *s)
@@ -268,13 +274,13 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
 {
 	return false;
 }
+
+static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
+static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
 #endif
 
 #define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
 	struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
 					 THIS_MODULE }
 
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-
 #endif
-- 
cgit v1.2.3


From b247f32aecad09e6cf7edff7739e6f2c9dc5fca9 Mon Sep 17 00:00:00 2001
From: Avihai Horon <avihaih@nvidia.com>
Date: Thu, 28 Oct 2021 16:03:06 +0300
Subject: net/mlx5: Dynamically resize flow counters query buffer

The flow counters bulk query buffer is allocated once during
mlx5_fc_init_stats(). For PFs and VFs this buffer usually takes a little
more than 512KB of memory, which is aligned to the next power of 2, to
1MB. For SFs, this buffer is reduced and takes around 128 Bytes.

The buffer size determines the maximum number of flow counters that
can be queried at a time. Thus, having a bigger buffer can improve
performance for users that need to query many flow counters.

There are cases that don't use many flow counters and don't need a big
buffer (e.g. SFs, VFs). Since this size is critical with large scale,
in these cases the buffer size should be reduced.

In order to reduce memory consumption while maintaining query
performance, change the query buffer's allocation scheme to the
following:
- First allocate the buffer with small initial size.
- If the number of counters surpasses the initial size, resize the
  buffer to the maximum size.

The buffer only grows and isn't shrank, because users with many flow
counters don't care about the buffer size and we don't want to add
resize overhead if the current number of counters drops.

This solution is preferable to the current one, which is less accurate
and only addresses SFs.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/driver.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a623ec635947..78655d8d13a7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -478,6 +478,10 @@ struct mlx5_fc_stats {
 	unsigned long next_query;
 	unsigned long sampling_interval; /* jiffies */
 	u32 *bulk_query_out;
+	int bulk_query_len;
+	size_t num_counters;
+	bool bulk_query_alloc_failed;
+	unsigned long next_bulk_query_alloc;
 	struct mlx5_fc_pool fc_pool;
 };
 
-- 
cgit v1.2.3


From e2748ad5257754a47376e28c0f9dda4f5c1e5ca3 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:02:00 -0600
Subject: mtd: remove unused header file <linux/mtd/latch-addr-flash.h>

Commit d24dbd7541ff ("mtd: maps: Get rid of the latch-addr-flash driver")
removed the last user of <linux/mtd/latch-addr-flash.h> but left the header
file behind.  Nothing uses this file, delete it now.

Cc: Boris Brezillon <bbrezillon@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: linux-mtd@lists.infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20211102220203.940290-7-corbet@lwn.net
---
 include/linux/mtd/latch-addr-flash.h | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 include/linux/mtd/latch-addr-flash.h

(limited to 'include')

diff --git a/include/linux/mtd/latch-addr-flash.h b/include/linux/mtd/latch-addr-flash.h
deleted file mode 100644
index e94b8e128074..000000000000
--- a/include/linux/mtd/latch-addr-flash.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Interface for NOR flash driver whose high address lines are latched
- *
- * Copyright © 2008 MontaVista Software, Inc. <source@mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-#ifndef __LATCH_ADDR_FLASH__
-#define __LATCH_ADDR_FLASH__
-
-struct map_info;
-struct mtd_partition;
-
-struct latch_addr_flash_data {
-	unsigned int		width;
-	unsigned int		size;
-
-	int			(*init)(void *data, int cs);
-	void			(*done)(void *data);
-	void			(*set_window)(unsigned long offset, void *data);
-	void			*data;
-
-	unsigned int		nr_parts;
-	struct mtd_partition	*parts;
-};
-
-#endif
-- 
cgit v1.2.3


From 03cfda4fa6ea9bea2f30160579a78c2b8c1e616e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Dec 2021 15:37:24 -0800
Subject: tcp: fix another uninit-value (sk_rx_queue_mapping)

KMSAN is still not happy [1].

I missed that passive connections do not inherit their
sk_rx_queue_mapping values from the request socket,
but instead tcp_child_process() is calling
sk_mark_napi_id(child, skb)

We have many sk_mark_napi_id() callers, so I am providing
a new helper, forcing the setting sk_rx_queue_mapping
and sk_napi_id.

Note that we had no KMSAN report for sk_napi_id because
passive connections got a copy of this field from the listener.
sk_rx_queue_mapping in the other hand is inside the
sk_dontcopy_begin/sk_dontcopy_end so sk_clone_lock()
leaves this field uninitialized.

We might remove dead code populating req->sk_rx_queue_mapping
in the future.

[1]

BUG: KMSAN: uninit-value in __sk_rx_queue_set include/net/sock.h:1924 [inline]
BUG: KMSAN: uninit-value in sk_rx_queue_update include/net/sock.h:1938 [inline]
BUG: KMSAN: uninit-value in sk_mark_napi_id include/net/busy_poll.h:136 [inline]
BUG: KMSAN: uninit-value in tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833
 __sk_rx_queue_set include/net/sock.h:1924 [inline]
 sk_rx_queue_update include/net/sock.h:1938 [inline]
 sk_mark_napi_id include/net/busy_poll.h:136 [inline]
 tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833
 tcp_v4_rcv+0x3d83/0x4ed0 net/ipv4/tcp_ipv4.c:2066
 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204
 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252
 dst_input include/net/dst.h:460 [inline]
 ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline]
 ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline]
 ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609
 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644
 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline]
 __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553
 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605
 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696
 gro_normal_list net/core/dev.c:5850 [inline]
 napi_complete_done+0x579/0xdd0 net/core/dev.c:6587
 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline]
 virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557
 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020
 napi_poll net/core/dev.c:7087 [inline]
 net_rx_action+0x824/0x1880 net/core/dev.c:7174
 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558
 run_ksoftirqd+0x33/0x50 kernel/softirq.c:920
 smpboot_thread_fn+0x616/0xbf0 kernel/smpboot.c:164
 kthread+0x721/0x850 kernel/kthread.c:327
 ret_from_fork+0x1f/0x30

Uninit was created at:
 __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409
 alloc_pages+0x8a5/0xb80
 alloc_slab_page mm/slub.c:1810 [inline]
 allocate_slab+0x287/0x1c20 mm/slub.c:1947
 new_slab mm/slub.c:2010 [inline]
 ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039
 __slab_alloc mm/slub.c:3126 [inline]
 slab_alloc_node mm/slub.c:3217 [inline]
 slab_alloc mm/slub.c:3259 [inline]
 kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264
 sk_prot_alloc+0xeb/0x570 net/core/sock.c:1914
 sk_clone_lock+0xd6/0x1940 net/core/sock.c:2118
 inet_csk_clone_lock+0x8d/0x6a0 net/ipv4/inet_connection_sock.c:956
 tcp_create_openreq_child+0xb1/0x1ef0 net/ipv4/tcp_minisocks.c:453
 tcp_v4_syn_recv_sock+0x268/0x2710 net/ipv4/tcp_ipv4.c:1563
 tcp_check_req+0x207c/0x2a30 net/ipv4/tcp_minisocks.c:765
 tcp_v4_rcv+0x36f5/0x4ed0 net/ipv4/tcp_ipv4.c:2047
 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204
 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252
 dst_input include/net/dst.h:460 [inline]
 ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline]
 ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline]
 ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609
 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644
 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline]
 __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553
 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605
 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696
 gro_normal_list net/core/dev.c:5850 [inline]
 napi_complete_done+0x579/0xdd0 net/core/dev.c:6587
 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline]
 virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557
 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020
 napi_poll net/core/dev.c:7087 [inline]
 net_rx_action+0x824/0x1880 net/core/dev.c:7174
 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558

Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping")
Fixes: a37a0ee4d25c ("net: avoid uninit-value from tcp_conn_request")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Tested-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 7994455ec714..c4898fcbf923 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -136,6 +136,19 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 	sk_rx_queue_update(sk, skb);
 }
 
+/* Variant of sk_mark_napi_id() for passive flow setup,
+ * as sk->sk_napi_id and sk->sk_rx_queue_mapping content
+ * needs to be set.
+ */
+static inline void sk_mark_napi_id_set(struct sock *sk,
+				       const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+#endif
+	sk_rx_queue_set(sk, skb);
+}
+
 static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
-- 
cgit v1.2.3


From dac8e00fb640e9569cdeefd3ce8a75639e5d0711 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Dec 2021 18:27:18 -0800
Subject: bonding: make tx_rebalance_counter an atomic

KCSAN reported a data-race [1] around tx_rebalance_counter
which can be accessed from different contexts, without
the protection of a lock/mutex.

[1]
BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor

write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0:
 bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613
 bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949
 do_set_master net/core/rtnetlink.c:2521 [inline]
 __rtnl_newlink net/core/rtnetlink.c:3475 [inline]
 rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506
 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571
 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491
 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589
 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
 netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345
 netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmsg+0x195/0x230 net/socket.c:2492
 __do_sys_sendmsg net/socket.c:2501 [inline]
 __se_sys_sendmsg net/socket.c:2499 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1:
 bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511
 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298
 worker_thread+0x616/0xa70 kernel/workqueue.c:2445
 kthread+0x2c7/0x2e0 kernel/kthread.c:327
 ret_from_fork+0x1f/0x30

value changed: 0x00000001 -> 0x00000064

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: bond1 bond_alb_monitor

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bond_alb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index f6af76c87a6c..191c36afa1f4 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -126,7 +126,7 @@ struct tlb_slave_info {
 struct alb_bond_info {
 	struct tlb_client_info	*tx_hashtbl; /* Dynamically allocated */
 	u32			unbalanced_load;
-	int			tx_rebalance_counter;
+	atomic_t		tx_rebalance_counter;
 	int			lp_counter;
 	/* -------- rlb parameters -------- */
 	int rlb_enabled;
-- 
cgit v1.2.3


From 2c4dcd7fd57b20a21b65da04d89c38a7217d79cf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 29 Nov 2021 14:03:07 +0100
Subject: topology/sysfs: export die attributes only if an architectures has
 support

The die_id and die_cpus topology sysfs attributes have been added with
commit 0e344d8c709f ("cpu/topology: Export die_id") and commit
2e4c54dac7b3 ("topology: Create core_cpus and die_cpus sysfs attributes").

While they are currently only used and useful for x86 they are still
present with bogus default values for all architectures. Instead of
enforcing such new sysfs attributes to all architectures, make them
only optional visible if an architecture opts in by defining both the
topology_die_id and topology_die_cpumask attributes.

This is similar to what was done when the book and drawer topology
levels were introduced: avoid useless and therefore confusing sysfs
attributes for architectures which cannot make use of them.

This should not break any existing applications, since this is a
rather new interface and applications should be able to handle also
older kernel versions without such attributes - besides that they
contain only useful information for x86.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20211129130309.3256168-2-hca@linux.ibm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/topology.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0b3704ad13c8..8d1bdae76230 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -180,6 +180,10 @@ static inline int cpu_to_mem(int cpu)
 
 #endif	/* [!]CONFIG_HAVE_MEMORYLESS_NODES */
 
+#if defined(topology_die_id) && defined(topology_die_cpumask)
+#define TOPOLOGY_DIE_SYSFS
+#endif
+
 #ifndef topology_physical_package_id
 #define topology_physical_package_id(cpu)	((void)(cpu), -1)
 #endif
-- 
cgit v1.2.3


From e795707703b32fecdd7467afcc33ff1e92416c05 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 29 Nov 2021 14:03:08 +0100
Subject: topology/sysfs: export cluster attributes only if an architectures
 has support

The cluster_id and cluster_cpus topology sysfs attributes have been
added with commit c5e22feffdd7 ("topology: Represent clusters of CPUs
within a die").

They are currently only used for x86, arm64, and riscv (via generic
arch topology), however they are still present with bogus default
values for all other architectures. Instead of enforcing such new
sysfs attributes to all architectures, make them only optional visible
if an architecture opts in by defining both the topology_cluster_id
and topology_cluster_cpumask attributes.

This is similar to what was done when the book and drawer topology
levels were introduced: avoid useless and therefore confusing sysfs
attributes for architectures which cannot make use of them.

This should not break any existing applications, since this is a
new interface introduced with the v5.16 merge window.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20211129130309.3256168-3-hca@linux.ibm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/topology.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 8d1bdae76230..d52be69037db 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -183,6 +183,9 @@ static inline int cpu_to_mem(int cpu)
 #if defined(topology_die_id) && defined(topology_die_cpumask)
 #define TOPOLOGY_DIE_SYSFS
 #endif
+#if defined(topology_cluster_id) && defined(topology_cluster_cpumask)
+#define TOPOLOGY_CLUSTER_SYSFS
+#endif
 
 #ifndef topology_physical_package_id
 #define topology_physical_package_id(cpu)	((void)(cpu), -1)
-- 
cgit v1.2.3


From f1045056c726440469d89d23c13734bcd6c0d15b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 29 Nov 2021 14:03:09 +0100
Subject: topology/sysfs: rework book and drawer topology ifdefery

Provide default defines for the topology_book_[id|cpumask] and
topology_drawer_[id|cpumask] macros just like for each other topology
level.
This way all topology levels are handled in a similar way. Still the
the book and drawer levels are only used on s390, and also the sysfs
attributes are only created on s390. However other architectures may
opt in if wanted.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20211129130309.3256168-4-hca@linux.ibm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/topology.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index d52be69037db..a6e201758ae9 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -186,6 +186,12 @@ static inline int cpu_to_mem(int cpu)
 #if defined(topology_cluster_id) && defined(topology_cluster_cpumask)
 #define TOPOLOGY_CLUSTER_SYSFS
 #endif
+#if defined(topology_book_id) && defined(topology_book_cpumask)
+#define TOPOLOGY_BOOK_SYSFS
+#endif
+#if defined(topology_drawer_id) && defined(topology_drawer_cpumask)
+#define TOPOLOGY_DRAWER_SYSFS
+#endif
 
 #ifndef topology_physical_package_id
 #define topology_physical_package_id(cpu)	((void)(cpu), -1)
@@ -199,6 +205,12 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_core_id
 #define topology_core_id(cpu)			((void)(cpu), 0)
 #endif
+#ifndef topology_book_id
+#define topology_book_id(cpu)			((void)(cpu), -1)
+#endif
+#ifndef topology_drawer_id
+#define topology_drawer_id(cpu)			((void)(cpu), -1)
+#endif
 #ifndef topology_sibling_cpumask
 #define topology_sibling_cpumask(cpu)		cpumask_of(cpu)
 #endif
@@ -211,6 +223,12 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_die_cpumask
 #define topology_die_cpumask(cpu)		cpumask_of(cpu)
 #endif
+#ifndef topology_book_cpumask
+#define topology_book_cpumask(cpu)		cpumask_of(cpu)
+#endif
+#ifndef topology_drawer_cpumask
+#define topology_drawer_cpumask(cpu)		cpumask_of(cpu)
+#endif
 
 #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask)
 static inline const struct cpumask *cpu_smt_mask(int cpu)
-- 
cgit v1.2.3


From c7fdb2404f66131bc9c22e06f712717288826487 Mon Sep 17 00:00:00 2001
From: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Date: Sun, 28 Nov 2021 23:02:14 -0800
Subject: drivers: soc: xilinx: add xilinx event management driver

Xilinx event management driver provides an interface to subscribe or
unsubscribe for the event/callback supported by firmware. An agent can use
this driver to register for Error Event, Device Event and Suspend callback.
This driver only allows one agent per event to do registration. Driver will
return an error in case of multiple registration for the same event.

This driver gets notification from firmware through TF-A as SGI. During
initialization, event manager driver register handler for SGI used for
notification. It also provides SGI number info to TF-A by using
IOCTL_REGISTER_SGI call to TF-A.

After receiving notification from firmware, the driver makes an SMC call to
TF-A to get IPI data. From the IPI data provided by TF-A, event manager
identified the cause of event and forward that event/callback notification
to the respective subscribed driver. After this, in case of Error Event,
driver performs unregistration as firmware expecting from agent to do
re-registration if the agent wants to get notified on the second occurrence
of an error event.

Add new IOCTL id IOCTL_REGISTER_SGI = 25 which is used to register SGI on
TF-A.

Older firmware doesn't have all required support for event handling which
is required by the event manager driver. So add check for the register
notifier version in the event manager driver.

Xilinx event management driver provides support to subscribe for multiple
error events with the use of Event Mask in a single call of
xlnx_register_event(). Agent driver can provide 'Event' parameter value as
ORed of multiple event masks to register single callback for multiple
events. For example, to register callback for event=0x1 and event=0x2 for
the given node, agent can provide event=0x3 (0x1 | 0x2). It is not possible
to register multiple events for different nodes in a single registration
call.

Also provide support to receive multiple error events as in single
notification from firmware and then forward it to subscribed drivers via
registered callback one by one.

Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Tejas Patel <tejas.patel@xilinx.com>
Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
Link: https://lore.kernel.org/r/20211129070216.30253-2-abhyuday.godhasara@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/firmware/xlnx-event-manager.h | 36 +++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h        |  2 ++
 2 files changed, 38 insertions(+)
 create mode 100644 include/linux/firmware/xlnx-event-manager.h

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h
new file mode 100644
index 000000000000..3f87c4929d21
--- /dev/null
+++ b/include/linux/firmware/xlnx-event-manager.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_
+#define _FIRMWARE_XLNX_EVENT_MANAGER_H_
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+#define CB_MAX_PAYLOAD_SIZE	(4U) /*In payload maximum 32bytes */
+
+/************************** Exported Function *****************************/
+
+typedef void (*event_cb_func_t)(const u32 *payload, void *data);
+
+#if IS_REACHABLE(CONFIG_XLNX_EVENT_MANAGER)
+int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+			const u32 event, const bool wake,
+			event_cb_func_t cb_fun, void *data);
+
+int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+			  const u32 event, event_cb_func_t cb_fun);
+#else
+static inline int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+				      const u32 event, const bool wake,
+				      event_cb_func_t cb_fun, void *data)
+{
+	return -ENODEV;
+}
+
+static inline int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id,
+					 const u32 event, event_cb_func_t cb_fun)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* _FIRMWARE_XLNX_EVENT_MANAGER_H_ */
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 077e894bb340..907cb01890cf 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -141,6 +141,8 @@ enum pm_ioctl_id {
 	/* Set healthy bit value */
 	IOCTL_SET_BOOT_HEALTH_STATUS = 17,
 	IOCTL_OSPI_MUX_SELECT = 21,
+	/* Register SGI to ATF */
+	IOCTL_REGISTER_SGI = 25,
 };
 
 enum pm_query_id {
-- 
cgit v1.2.3


From 14866a7db8da1f61fb6135c461b733694eea9580 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Wed, 1 Dec 2021 20:43:03 -0800
Subject: Documentation/auxiliary_bus: Add example code for
 module_auxiliary_driver()

Add an example code snipit to the module_auxiliary_driver()
documentation which is consistent with the other example code in the
elsewhere in the documentation.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20211202044305.4006853-6-ira.weiny@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index fc51d45f106b..605b27aab693 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -66,6 +66,10 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv);
  * Helper macro for auxiliary drivers which do not do anything special in
  * module init/exit. This eliminates a lot of boilerplate. Each module may only
  * use this macro once, and calling it replaces module_init() and module_exit()
+ *
+ * .. code-block:: c
+ *
+ *	module_auxiliary_driver(my_drv);
  */
 #define module_auxiliary_driver(__auxiliary_driver) \
 	module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister)
-- 
cgit v1.2.3


From e1b5186810cc7d4ec60447032636b8e6772dbbc6 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Wed, 1 Dec 2021 20:43:05 -0800
Subject: Documentation/auxiliary_bus: Move the text into the code

The code and documentation are more difficult to maintain when kept
separately.  This is further compounded when the standard structure
documentation infrastructure is not used.

Move the documentation into the code, use the standard documentation
infrastructure, add current documented functions, and reference the text
in the rst file.

Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20211202044305.4006853-8-ira.weiny@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 160 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 605b27aab693..e6d8b5c16226 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -11,12 +11,172 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 
+/**
+ * DOC: DEVICE_LIFESPAN
+ *
+ * The registering driver is the entity that allocates memory for the
+ * auxiliary_device and registers it on the auxiliary bus.  It is important to
+ * note that, as opposed to the platform bus, the registering driver is wholly
+ * responsible for the management of the memory used for the device object.
+ *
+ * To be clear the memory for the auxiliary_device is freed in the release()
+ * callback defined by the registering driver.  The registering driver should
+ * only call auxiliary_device_delete() and then auxiliary_device_uninit() when
+ * it is done with the device.  The release() function is then automatically
+ * called if and when other code releases their reference to the devices.
+ *
+ * A parent object, defined in the shared header file, contains the
+ * auxiliary_device.  It also contains a pointer to the shared object(s), which
+ * also is defined in the shared header.  Both the parent object and the shared
+ * object(s) are allocated by the registering driver.  This layout allows the
+ * auxiliary_driver's registering module to perform a container_of() call to go
+ * from the pointer to the auxiliary_device, that is passed during the call to
+ * the auxiliary_driver's probe function, up to the parent object, and then
+ * have access to the shared object(s).
+ *
+ * The memory for the shared object(s) must have a lifespan equal to, or
+ * greater than, the lifespan of the memory for the auxiliary_device.  The
+ * auxiliary_driver should only consider that the shared object is valid as
+ * long as the auxiliary_device is still registered on the auxiliary bus.  It
+ * is up to the registering driver to manage (e.g. free or keep available) the
+ * memory for the shared object beyond the life of the auxiliary_device.
+ *
+ * The registering driver must unregister all auxiliary devices before its own
+ * driver.remove() is completed.  An easy way to ensure this is to use the
+ * devm_add_action_or_reset() call to register a function against the parent
+ * device which unregisters the auxiliary device object(s).
+ *
+ * Finally, any operations which operate on the auxiliary devices must continue
+ * to function (if only to return an error) after the registering driver
+ * unregisters the auxiliary device.
+ */
+
+/**
+ * struct auxiliary_device - auxiliary device object.
+ * @dev: Device,
+ *       The release and parent fields of the device structure must be filled
+ *       in
+ * @name: Match name found by the auxiliary device driver,
+ * @id: unique identitier if multiple devices of the same name are exported,
+ *
+ * An auxiliary_device represents a part of its parent device's functionality.
+ * It is given a name that, combined with the registering drivers
+ * KBUILD_MODNAME, creates a match_name that is used for driver binding, and an
+ * id that combined with the match_name provide a unique name to register with
+ * the bus subsystem.  For example, a driver registering an auxiliary device is
+ * named 'foo_mod.ko' and the subdevice is named 'foo_dev'.  The match name is
+ * therefore 'foo_mod.foo_dev'.
+ *
+ * Registering an auxiliary_device is a three-step process.
+ *
+ * First, a 'struct auxiliary_device' needs to be defined or allocated for each
+ * sub-device desired.  The name, id, dev.release, and dev.parent fields of
+ * this structure must be filled in as follows.
+ *
+ * The 'name' field is to be given a name that is recognized by the auxiliary
+ * driver.  If two auxiliary_devices with the same match_name, eg
+ * "foo_mod.foo_dev", are registered onto the bus, they must have unique id
+ * values (e.g. "x" and "y") so that the registered devices names are
+ * "foo_mod.foo_dev.x" and "foo_mod.foo_dev.y".  If match_name + id are not
+ * unique, then the device_add fails and generates an error message.
+ *
+ * The auxiliary_device.dev.type.release or auxiliary_device.dev.release must
+ * be populated with a non-NULL pointer to successfully register the
+ * auxiliary_device.  This release call is where resources associated with the
+ * auxiliary device must be free'ed.  Because once the device is placed on the
+ * bus the parent driver can not tell what other code may have a reference to
+ * this data.
+ *
+ * The auxiliary_device.dev.parent should be set.  Typically to the registering
+ * drivers device.
+ *
+ * Second, call auxiliary_device_init(), which checks several aspects of the
+ * auxiliary_device struct and performs a device_initialize().  After this step
+ * completes, any error state must have a call to auxiliary_device_uninit() in
+ * its resolution path.
+ *
+ * The third and final step in registering an auxiliary_device is to perform a
+ * call to auxiliary_device_add(), which sets the name of the device and adds
+ * the device to the bus.
+ *
+ * .. code-block:: c
+ *
+ *      #define MY_DEVICE_NAME "foo_dev"
+ *
+ *      ...
+ *
+ *	struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx);
+ *
+ *	// Step 1:
+ *	my_aux_dev->name = MY_DEVICE_NAME;
+ *	my_aux_dev->id = my_unique_id_alloc(xxx);
+ *	my_aux_dev->dev.release = my_aux_dev_release;
+ *	my_aux_dev->dev.parent = my_dev;
+ *
+ *	// Step 2:
+ *	if (auxiliary_device_init(my_aux_dev))
+ *		goto fail;
+ *
+ *	// Step 3:
+ *	if (auxiliary_device_add(my_aux_dev)) {
+ *		auxiliary_device_uninit(my_aux_dev);
+ *		goto fail;
+ *	}
+ *
+ *	...
+ *
+ *
+ * Unregistering an auxiliary_device is a two-step process to mirror the
+ * register process.  First call auxiliary_device_delete(), then call
+ * auxiliary_device_uninit().
+ *
+ * .. code-block:: c
+ *
+ *         auxiliary_device_delete(my_dev->my_aux_dev);
+ *         auxiliary_device_uninit(my_dev->my_aux_dev);
+ */
 struct auxiliary_device {
 	struct device dev;
 	const char *name;
 	u32 id;
 };
 
+/**
+ * struct auxiliary_driver - Definition of an auxiliary bus driver
+ * @probe: Called when a matching device is added to the bus.
+ * @remove: Called when device is removed from the bus.
+ * @shutdown: Called at shut-down time to quiesce the device.
+ * @suspend: Called to put the device to sleep mode. Usually to a power state.
+ * @resume: Called to bring a device from sleep mode.
+ * @name: Driver name.
+ * @driver: Core driver structure.
+ * @id_table: Table of devices this driver should match on the bus.
+ *
+ * Auxiliary drivers follow the standard driver model convention, where
+ * discovery/enumeration is handled by the core, and drivers provide probe()
+ * and remove() methods. They support power management and shutdown
+ * notifications using the standard conventions.
+ *
+ * Auxiliary drivers register themselves with the bus by calling
+ * auxiliary_driver_register(). The id_table contains the match_names of
+ * auxiliary devices that a driver can bind with.
+ *
+ * .. code-block:: c
+ *
+ *         static const struct auxiliary_device_id my_auxiliary_id_table[] = {
+ *		   { .name = "foo_mod.foo_dev" },
+ *                 {},
+ *         };
+ *
+ *         MODULE_DEVICE_TABLE(auxiliary, my_auxiliary_id_table);
+ *
+ *         struct auxiliary_driver my_drv = {
+ *                 .name = "myauxiliarydrv",
+ *                 .id_table = my_auxiliary_id_table,
+ *                 .probe = my_drv_probe,
+ *                 .remove = my_drv_remove
+ *         };
+ */
 struct auxiliary_driver {
 	int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id);
 	void (*remove)(struct auxiliary_device *auxdev);
-- 
cgit v1.2.3


From bb49e9e730c2906a958eee273a7819f401543d6c Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:16:58 +0100
Subject: fs: add is_idmapped_mnt() helper

Multiple places open-code the same check to determine whether a given
mount is idmapped. Introduce a simple helper function that can be used
instead. This allows us to get rid of the fragile open-coding. We will
later change the check that is used to determine whether a given mount
is idmapped. Introducing a helper allows us to do this in a single
place instead of doing it for multiple places.

Link: https://lore.kernel.org/r/20211123114227.3124056-2-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-2-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-2-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..06cbefd76de7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2724,6 +2724,20 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file)
 {
 	return mnt_user_ns(file->f_path.mnt);
 }
+
+/**
+ * is_idmapped_mnt - check whether a mount is mapped
+ * @mnt: the mount to check
+ *
+ * If @mnt has an idmapping attached to it @mnt is mapped.
+ *
+ * Return: true if mount is mapped, false if not.
+ */
+static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
+{
+	return mnt_user_ns(mnt) != &init_user_ns;
+}
+
 extern long vfs_truncate(const struct path *, loff_t);
 int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
 		unsigned int time_attrs, struct file *filp);
-- 
cgit v1.2.3


From a793d79ea3e041081cd7cbd8ee43d0b5e4914a2b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:16:59 +0100
Subject: fs: move mapping helpers

The low-level mapping helpers were so far crammed into fs.h. They are
out of place there. The fs.h header should just contain the higher-level
mapping helpers that interact directly with vfs objects such as struct
super_block or struct inode and not the bare mapping helpers. Similarly,
only vfs and specific fs code shall interact with low-level mapping
helpers. And so they won't be made accessible automatically through
regular {g,u}id helpers.

Link: https://lore.kernel.org/r/20211123114227.3124056-3-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-3-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-3-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h            |  91 +------------------------------------
 include/linux/mnt_idmapping.h | 101 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 90 deletions(-)
 create mode 100644 include/linux/mnt_idmapping.h

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 06cbefd76de7..b3bcb2129699 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -41,6 +41,7 @@
 #include <linux/stddef.h>
 #include <linux/mount.h>
 #include <linux/cred.h>
+#include <linux/mnt_idmapping.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1624,34 +1625,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
 	inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 }
 
-/**
- * kuid_into_mnt - map a kuid down into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kuid: kuid to be mapped
- *
- * Return: @kuid mapped according to @mnt_userns.
- * If @kuid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
-				   kuid_t kuid)
-{
-	return make_kuid(mnt_userns, __kuid_val(kuid));
-}
-
-/**
- * kgid_into_mnt - map a kgid down into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kgid: kgid to be mapped
- *
- * Return: @kgid mapped according to @mnt_userns.
- * If @kgid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
-				   kgid_t kgid)
-{
-	return make_kgid(mnt_userns, __kgid_val(kgid));
-}
-
 /**
  * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns
  * @mnt_userns: user namespace of the mount the inode was found from
@@ -1680,68 +1653,6 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
 	return kgid_into_mnt(mnt_userns, inode->i_gid);
 }
 
-/**
- * kuid_from_mnt - map a kuid up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kuid: kuid to be mapped
- *
- * Return: @kuid mapped up according to @mnt_userns.
- * If @kuid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
-				   kuid_t kuid)
-{
-	return KUIDT_INIT(from_kuid(mnt_userns, kuid));
-}
-
-/**
- * kgid_from_mnt - map a kgid up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kgid: kgid to be mapped
- *
- * Return: @kgid mapped up according to @mnt_userns.
- * If @kgid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
-				   kgid_t kgid)
-{
-	return KGIDT_INIT(from_kgid(mnt_userns, kgid));
-}
-
-/**
- * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- *
- * Use this helper to initialize a new vfs or filesystem object based on
- * the caller's fsuid. A common example is initializing the i_uid field of
- * a newly allocated inode triggered by a creation event such as mkdir or
- * O_CREAT. Other examples include the allocation of quotas for a specific
- * user.
- *
- * Return: the caller's current fsuid mapped up according to @mnt_userns.
- */
-static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
-{
-	return kuid_from_mnt(mnt_userns, current_fsuid());
-}
-
-/**
- * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- *
- * Use this helper to initialize a new vfs or filesystem object based on
- * the caller's fsgid. A common example is initializing the i_gid field of
- * a newly allocated inode triggered by a creation event such as mkdir or
- * O_CREAT. Other examples include the allocation of quotas for a specific
- * user.
- *
- * Return: the caller's current fsgid mapped up according to @mnt_userns.
- */
-static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
-{
-	return kgid_from_mnt(mnt_userns, current_fsgid());
-}
-
 /**
  * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
  * @inode: inode to initialize
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
new file mode 100644
index 000000000000..47c7811fadfe
--- /dev/null
+++ b/include/linux/mnt_idmapping.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MNT_IDMAPPING_H
+#define _LINUX_MNT_IDMAPPING_H
+
+#include <linux/types.h>
+#include <linux/uidgid.h>
+
+struct user_namespace;
+extern struct user_namespace init_user_ns;
+
+/**
+ * kuid_into_mnt - map a kuid down into a mnt_userns
+ * @mnt_userns: user namespace of the relevant mount
+ * @kuid: kuid to be mapped
+ *
+ * Return: @kuid mapped according to @mnt_userns.
+ * If @kuid has no mapping INVALID_UID is returned.
+ */
+static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
+				   kuid_t kuid)
+{
+	return make_kuid(mnt_userns, __kuid_val(kuid));
+}
+
+/**
+ * kgid_into_mnt - map a kgid down into a mnt_userns
+ * @mnt_userns: user namespace of the relevant mount
+ * @kgid: kgid to be mapped
+ *
+ * Return: @kgid mapped according to @mnt_userns.
+ * If @kgid has no mapping INVALID_GID is returned.
+ */
+static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
+				   kgid_t kgid)
+{
+	return make_kgid(mnt_userns, __kgid_val(kgid));
+}
+
+/**
+ * kuid_from_mnt - map a kuid up into a mnt_userns
+ * @mnt_userns: user namespace of the relevant mount
+ * @kuid: kuid to be mapped
+ *
+ * Return: @kuid mapped up according to @mnt_userns.
+ * If @kuid has no mapping INVALID_UID is returned.
+ */
+static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
+				   kuid_t kuid)
+{
+	return KUIDT_INIT(from_kuid(mnt_userns, kuid));
+}
+
+/**
+ * kgid_from_mnt - map a kgid up into a mnt_userns
+ * @mnt_userns: user namespace of the relevant mount
+ * @kgid: kgid to be mapped
+ *
+ * Return: @kgid mapped up according to @mnt_userns.
+ * If @kgid has no mapping INVALID_GID is returned.
+ */
+static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
+				   kgid_t kgid)
+{
+	return KGIDT_INIT(from_kgid(mnt_userns, kgid));
+}
+
+/**
+ * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
+ * @mnt_userns: user namespace of the relevant mount
+ *
+ * Use this helper to initialize a new vfs or filesystem object based on
+ * the caller's fsuid. A common example is initializing the i_uid field of
+ * a newly allocated inode triggered by a creation event such as mkdir or
+ * O_CREAT. Other examples include the allocation of quotas for a specific
+ * user.
+ *
+ * Return: the caller's current fsuid mapped up according to @mnt_userns.
+ */
+static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
+{
+	return kuid_from_mnt(mnt_userns, current_fsuid());
+}
+
+/**
+ * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
+ * @mnt_userns: user namespace of the relevant mount
+ *
+ * Use this helper to initialize a new vfs or filesystem object based on
+ * the caller's fsgid. A common example is initializing the i_gid field of
+ * a newly allocated inode triggered by a creation event such as mkdir or
+ * O_CREAT. Other examples include the allocation of quotas for a specific
+ * user.
+ *
+ * Return: the caller's current fsgid mapped up according to @mnt_userns.
+ */
+static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
+{
+	return kgid_from_mnt(mnt_userns, current_fsgid());
+}
+
+#endif /* _LINUX_MNT_IDMAPPING_H */
-- 
cgit v1.2.3


From 476860b3eb4a50958243158861d5340066df5af2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:17:00 +0100
Subject: fs: tweak fsuidgid_has_mapping()

If the caller's fs{g,u}id aren't mapped in the mount's idmapping we can
return early and skip the check whether the mapped fs{g,u}id also have a
mapping in the filesystem's idmapping. If the fs{g,u}id aren't mapped in
the mount's idmapping they consequently can't be mapped in the
filesystem's idmapping. So there's no point in checking that.

Link: https://lore.kernel.org/r/20211123114227.3124056-4-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-4-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-4-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3bcb2129699..db5ee15e36b1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1695,10 +1695,18 @@ static inline void inode_fsgid_set(struct inode *inode,
 static inline bool fsuidgid_has_mapping(struct super_block *sb,
 					struct user_namespace *mnt_userns)
 {
-	struct user_namespace *s_user_ns = sb->s_user_ns;
+	struct user_namespace *fs_userns = sb->s_user_ns;
+	kuid_t kuid;
+	kgid_t kgid;
 
-	return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) &&
-	       kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns));
+	kuid = mapped_fsuid(mnt_userns);
+	if (!uid_valid(kuid))
+		return false;
+	kgid = mapped_fsgid(mnt_userns);
+	if (!gid_valid(kgid))
+		return false;
+	return kuid_has_mapping(fs_userns, kuid) &&
+	       kgid_has_mapping(fs_userns, kgid);
 }
 
 extern struct timespec64 current_time(struct inode *inode);
-- 
cgit v1.2.3


From 1ac2a4104968e0a60b4b3572216a92aab5c1b025 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:17:01 +0100
Subject: fs: account for filesystem mappings

Currently we only support idmapped mounts for filesystems mounted
without an idmapping. This was a conscious decision mentioned in
multiple places (cf. e.g. [1]).

As explained at length in [3] it is perfectly fine to extend support for
idmapped mounts to filesystem's mounted with an idmapping should the
need arise. The need has been there for some time now. Various container
projects in userspace need this to run unprivileged and nested
unprivileged containers (cf. [2]).

Before we can port any filesystem that is mountable with an idmapping to
support idmapped mounts we need to first extend the mapping helpers to
account for the filesystem's idmapping. This again, is explained at
length in our documentation at [3] but I'll give an overview here again.

Currently, the low-level mapping helpers implement the remapping
algorithms described in [3] in a simplified manner. Because we could
rely on the fact that all filesystems supporting idmapped mounts are
mounted without an idmapping the translation step from or into the
filesystem idmapping could be skipped.

In order to support idmapped mounts of filesystem's mountable with an
idmapping the translation step we were able to skip before cannot be
skipped anymore. A filesystem mounted with an idmapping is very likely
to not use an identity mapping and will instead use a non-identity
mapping. So the translation step from or into the filesystem's idmapping
in the remapping algorithm cannot be skipped for such filesystems. More
details with examples can be found in [3].

This patch adds a few new and prepares some already existing low-level
mapping helpers to perform the full translation algorithm explained in
[3]. The low-level helpers can be written in a way that they only
perform the additional translation step when the filesystem is indeed
mounted with an idmapping.

If the low-level helpers detect that they are not dealing with an
idmapped mount they can simply return the relevant k{g,u}id unchanged;
no remapping needs to be performed at all. The no_idmapping() helper
detects whether the shortcut can be used.

If the low-level helpers detected that they are dealing with an idmapped
mount but the underlying filesystem is mounted without an idmapping we
can rely on the previous shorcut and can continue to skip the
translation step from or into the filesystem's idmapping.

These checks guarantee that only the minimal amount of work is
performed. As before, if idmapped mounts aren't used the low-level
helpers are idempotent and no work is performed at all.

This patch adds the helpers mapped_k{g,u}id_fs() and
mapped_k{g,u}id_user(). Following patches will port all places to
replace the old k{g,u}id_into_mnt() and k{g,u}id_from_mnt() with these
two new helpers. After the conversion is done k{g,u}id_into_mnt() and
k{g,u}id_from_mnt() will be removed. This also concludes the renaming of
the mapping helpers we started in [4]. Now, all mapping helpers will
started with the "mapped_" prefix making everything nice and consistent.

The mapped_k{g,u}id_fs() helpers replace the k{g,u}id_into_mnt()
helpers. They are to be used when k{g,u}ids are to be mapped from the
vfs, e.g. from from struct inode's i_{g,u}id.  Conversely, the
mapped_k{g,u}id_user() helpers replace the k{g,u}id_from_mnt() helpers.
They are to be used when k{g,u}ids are to be written to disk, e.g. when
entering from a system call to change ownership of a file.

This patch only introduces the helpers. It doesn't yet convert the
relevant places to account for filesystem mounted with an idmapping.

[1]: commit 2ca4dcc4909d ("fs/mount_setattr: tighten permission checks")
[2]: https://github.com/containers/podman/issues/10374
[3]: Documentations/filesystems/idmappings.rst
[4]: commit a65e58e791a1 ("fs: document and rename fsid helpers")

Link: https://lore.kernel.org/r/20211123114227.3124056-5-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-5-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-5-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h            |   4 +-
 include/linux/mnt_idmapping.h | 193 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 191 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index db5ee15e36b1..57aee6ebba72 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1636,7 +1636,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
 static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
 				    const struct inode *inode)
 {
-	return kuid_into_mnt(mnt_userns, inode->i_uid);
+	return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid);
 }
 
 /**
@@ -1650,7 +1650,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
 static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
 				    const struct inode *inode)
 {
-	return kgid_into_mnt(mnt_userns, inode->i_gid);
+	return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid);
 }
 
 /**
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 47c7811fadfe..60341cd33ccc 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -6,6 +6,11 @@
 #include <linux/uidgid.h>
 
 struct user_namespace;
+/*
+ * Carries the initial idmapping of 0:0:4294967295 which is an identity
+ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
+ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
+ */
 extern struct user_namespace init_user_ns;
 
 /**
@@ -64,9 +69,189 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
 	return KGIDT_INIT(from_kgid(mnt_userns, kgid));
 }
 
+/**
+ * initial_idmapping - check whether this is the initial mapping
+ * @ns: idmapping to check
+ *
+ * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
+ * [...], 1000 to 1000 [...].
+ *
+ * Return: true if this is the initial mapping, false if not.
+ */
+static inline bool initial_idmapping(const struct user_namespace *ns)
+{
+	return ns == &init_user_ns;
+}
+
+/**
+ * no_idmapping - check whether we can skip remapping a kuid/gid
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ *
+ * This function can be used to check whether a remapping between two
+ * idmappings is required.
+ * An idmapped mount is a mount that has an idmapping attached to it that
+ * is different from the filsystem's idmapping and the initial idmapping.
+ * If the initial mapping is used or the idmapping of the mount and the
+ * filesystem are identical no remapping is required.
+ *
+ * Return: true if remapping can be skipped, false if not.
+ */
+static inline bool no_idmapping(const struct user_namespace *mnt_userns,
+				const struct user_namespace *fs_userns)
+{
+	return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
+}
+
+/**
+ * mapped_kuid_fs - map a filesystem kuid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kuid : kuid to be mapped
+ *
+ * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this
+ * function when preparing a @kuid to be reported to userspace.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kuid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kuid won't change when calling
+ * from_kuid() so we can simply retrieve the value via __kuid_val()
+ * directly.
+ *
+ * Return: @kuid mapped according to @mnt_userns.
+ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
+ * returned.
+ */
+static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
+				    struct user_namespace *fs_userns,
+				    kuid_t kuid)
+{
+	uid_t uid;
+
+	if (no_idmapping(mnt_userns, fs_userns))
+		return kuid;
+	if (initial_idmapping(fs_userns))
+		uid = __kuid_val(kuid);
+	else
+		uid = from_kuid(fs_userns, kuid);
+	if (uid == (uid_t)-1)
+		return INVALID_UID;
+	return make_kuid(mnt_userns, uid);
+}
+
+/**
+ * mapped_kgid_fs - map a filesystem kgid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kgid : kgid to be mapped
+ *
+ * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this
+ * function when preparing a @kgid to be reported to userspace.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kgid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kgid won't change when calling
+ * from_kgid() so we can simply retrieve the value via __kgid_val()
+ * directly.
+ *
+ * Return: @kgid mapped according to @mnt_userns.
+ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
+ * returned.
+ */
+static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
+				    struct user_namespace *fs_userns,
+				    kgid_t kgid)
+{
+	gid_t gid;
+
+	if (no_idmapping(mnt_userns, fs_userns))
+		return kgid;
+	if (initial_idmapping(fs_userns))
+		gid = __kgid_val(kgid);
+	else
+		gid = from_kgid(fs_userns, kgid);
+	if (gid == (gid_t)-1)
+		return INVALID_GID;
+	return make_kgid(mnt_userns, gid);
+}
+
+/**
+ * mapped_kuid_user - map a user kuid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kuid : kuid to be mapped
+ *
+ * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this
+ * function when preparing a @kuid to be written to disk or inode.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kuid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kuid won't change when calling
+ * make_kuid() so we can simply retrieve the value via KUIDT_INIT()
+ * directly.
+ *
+ * Return: @kuid mapped according to @mnt_userns.
+ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
+ * returned.
+ */
+static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns,
+				      struct user_namespace *fs_userns,
+				      kuid_t kuid)
+{
+	uid_t uid;
+
+	if (no_idmapping(mnt_userns, fs_userns))
+		return kuid;
+	uid = from_kuid(mnt_userns, kuid);
+	if (uid == (uid_t)-1)
+		return INVALID_UID;
+	if (initial_idmapping(fs_userns))
+		return KUIDT_INIT(uid);
+	return make_kuid(fs_userns, uid);
+}
+
+/**
+ * mapped_kgid_user - map a user kgid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kgid : kgid to be mapped
+ *
+ * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this
+ * function when preparing a @kgid to be written to disk or inode.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kgid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kgid won't change when calling
+ * make_kgid() so we can simply retrieve the value via KGIDT_INIT()
+ * directly.
+ *
+ * Return: @kgid mapped according to @mnt_userns.
+ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
+ * returned.
+ */
+static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
+				      struct user_namespace *fs_userns,
+				      kgid_t kgid)
+{
+	gid_t gid;
+
+	if (no_idmapping(mnt_userns, fs_userns))
+		return kgid;
+	gid = from_kgid(mnt_userns, kgid);
+	if (gid == (gid_t)-1)
+		return INVALID_GID;
+	if (initial_idmapping(fs_userns))
+		return KGIDT_INIT(gid);
+	return make_kgid(fs_userns, gid);
+}
+
 /**
  * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
+ * @mnt_userns: the mount's idmapping
  *
  * Use this helper to initialize a new vfs or filesystem object based on
  * the caller's fsuid. A common example is initializing the i_uid field of
@@ -78,12 +263,12 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
  */
 static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
 {
-	return kuid_from_mnt(mnt_userns, current_fsuid());
+	return mapped_kuid_user(mnt_userns, &init_user_ns, current_fsuid());
 }
 
 /**
  * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
+ * @mnt_userns: the mount's idmapping
  *
  * Use this helper to initialize a new vfs or filesystem object based on
  * the caller's fsgid. A common example is initializing the i_gid field of
@@ -95,7 +280,7 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
  */
 static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
 {
-	return kgid_from_mnt(mnt_userns, current_fsgid());
+	return mapped_kgid_user(mnt_userns, &init_user_ns, current_fsgid());
 }
 
 #endif /* _LINUX_MNT_IDMAPPING_H */
-- 
cgit v1.2.3


From 914b08b330d6722ed081e14580aae6fe66cd5946 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <bberg@redhat.com>
Date: Fri, 3 Dec 2021 15:59:00 +0100
Subject: Bluetooth: Add hci_cmd_sync_cancel to public API

After transfer errors it makes sense to cancel an ongoing synchronous
command that cannot complete anymore. To permit this, export the old
hci_req_sync_cancel function as hci_cmd_sync_cancel in the API.

Signed-off-by: Benjamin Berg <bberg@redhat.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 0336c1bc5d25..f4034bf8f1ce 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -37,6 +37,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
 
 void hci_cmd_sync_init(struct hci_dev *hdev);
 void hci_cmd_sync_clear(struct hci_dev *hdev);
+void hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
 
 int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 		       void *data, hci_cmd_sync_work_destroy_t destroy);
-- 
cgit v1.2.3


From 8581fd402a0cf80b5298e3b225e7a7bd8f110e69 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 2 Dec 2021 12:34:00 -0800
Subject: treewide: Add missing includes masked by cgroup -> bpf dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cgroup.h (therefore swap.h, therefore half of the universe)
includes bpf.h which in turn includes module.h and slab.h.
Since we're about to get rid of that dependency we need
to clean things up.

v2: drop the cpu.h include from cacheinfo.h, it's not necessary
and it makes riscv sensitive to ordering of include files.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Acked-by: Peter Chen <peter.chen@kernel.org>
Acked-by: SeongJae Park <sj@kernel.org>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/all/20211120035253.72074-1-kuba@kernel.org/  # v1
Link: https://lore.kernel.org/all/20211120165528.197359-1-kuba@kernel.org/ # cacheinfo discussion
Link: https://lore.kernel.org/bpf/20211202203400.1208663-1-kuba@kernel.org
---
 include/linux/cacheinfo.h     | 1 -
 include/linux/device/driver.h | 1 +
 include/linux/filter.h        | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2f909ed084c6..4ff37cb763ae 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -3,7 +3,6 @@
 #define _LINUX_CACHEINFO_H
 
 #include <linux/bitops.h>
-#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index a498ebcf4993..15e7c5e15d62 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -18,6 +18,7 @@
 #include <linux/klist.h>
 #include <linux/pm.h>
 #include <linux/device/bus.h>
+#include <linux/module.h>
 
 /**
  * enum probe_type - device driver probe type to try
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 534f678ca50f..7f1e88e3e2b5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -6,6 +6,7 @@
 #define __LINUX_FILTER_H__
 
 #include <linux/atomic.h>
+#include <linux/bpf.h>
 #include <linux/refcount.h>
 #include <linux/compat.h>
 #include <linux/skbuff.h>
@@ -26,7 +27,6 @@
 
 #include <asm/byteorder.h>
 #include <uapi/linux/filter.h>
-#include <uapi/linux/bpf.h>
 
 struct sk_buff;
 struct sock;
-- 
cgit v1.2.3


From 4bdcd1dd4d2f973b1a89fb20ba720d879e9e506b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 28 Oct 2021 08:47:05 -0600
Subject: mm: move filemap_range_needs_writeback() into header

No functional changes in this patch, just in preparation for efficiently
calling this light function from the block O_DIRECT handling.

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h      |  2 --
 include/linux/pagemap.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..6b8dc1a78df6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2847,8 +2847,6 @@ static inline int filemap_fdatawait(struct address_space *mapping)
 
 extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
 				  loff_t lend);
-extern bool filemap_range_needs_writeback(struct address_space *,
-					  loff_t lstart, loff_t lend);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
 extern int __filemap_fdatawrite_range(struct address_space *mapping,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 605246452305..274a0710f2c5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -963,6 +963,35 @@ static inline int add_to_page_cache(struct page *page,
 int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
 		pgoff_t index, gfp_t gfp, void **shadowp);
 
+bool filemap_range_has_writeback(struct address_space *mapping,
+				 loff_t start_byte, loff_t end_byte);
+
+/**
+ * filemap_range_needs_writeback - check if range potentially needs writeback
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback. Used by O_DIRECT
+ * read/write with IOCB_NOWAIT, to see if the caller needs to do
+ * filemap_write_and_wait_range() before proceeding.
+ *
+ * Return: %true if the caller should do filemap_write_and_wait_range() before
+ * doing O_DIRECT to a page in this range, %false otherwise.
+ */
+static inline bool filemap_range_needs_writeback(struct address_space *mapping,
+						 loff_t start_byte,
+						 loff_t end_byte)
+{
+	if (!mapping->nrpages)
+		return false;
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+	    !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+		return false;
+	return filemap_range_has_writeback(mapping, start_byte, end_byte);
+}
+
 /**
  * struct readahead_control - Describes a readahead request.
  *
-- 
cgit v1.2.3


From 0a467d0fdd9594fbb449ebc93852533332c528fd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 14 Oct 2021 14:39:59 -0600
Subject: block: switch to atomic_t for request references

refcount_t is not as expensive as it used to be, but it's still more
expensive than the io_uring method of using atomic_t and just checking
for potential over/underflow.

This borrows that same implementation, which in turn is based on the
mm implementation from Linus.

Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1b87b7c8bbff..561beb5be7ec 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -139,7 +139,7 @@ struct request {
 	unsigned short ioprio;
 
 	enum mq_rq_state state;
-	refcount_t ref;
+	atomic_t ref;
 
 	unsigned long deadline;
 
-- 
cgit v1.2.3


From 704b914f15fb7daaf517e3acc4bed472b50ca19e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 3 Dec 2021 21:15:32 +0800
Subject: blk-mq: move srcu from blk_mq_hw_ctx to request_queue

In case of BLK_MQ_F_BLOCKING, per-hctx srcu is used to protect dispatch
critical area. However, this srcu instance stays at the end of hctx, and
it often takes standalone cacheline, often cold.

Inside srcu_read_lock() and srcu_read_unlock(), WRITE is always done on
the indirect percpu variable which is allocated from heap instead of
being embedded, srcu->srcu_idx is read only in srcu_read_lock(). It
doesn't matter if srcu structure stays in hctx or request queue.

So switch to per-request-queue srcu for protecting dispatch, and this
way simplifies quiesce a lot, not mention quiesce is always done on the
request queue wide.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20211203131534.3668411-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 8 --------
 include/linux/blkdev.h | 9 +++++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 561beb5be7ec..ecdc049b52fa 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -4,7 +4,6 @@
 
 #include <linux/blkdev.h>
 #include <linux/sbitmap.h>
-#include <linux/srcu.h>
 #include <linux/lockdep.h>
 #include <linux/scatterlist.h>
 #include <linux/prefetch.h>
@@ -375,13 +374,6 @@ struct blk_mq_hw_ctx {
 	 * q->unused_hctx_list.
 	 */
 	struct list_head	hctx_list;
-
-	/**
-	 * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
-	 * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
-	 * blk_mq_hw_ctx_size().
-	 */
-	struct srcu_struct	srcu[];
 };
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0a4416ef4fbf..c80cfaefc0a8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -16,6 +16,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/blkzoned.h>
 #include <linux/sbitmap.h>
+#include <linux/srcu.h>
 
 struct module;
 struct request_queue;
@@ -373,11 +374,18 @@ struct request_queue {
 	 * devices that do not have multiple independent access ranges.
 	 */
 	struct blk_independent_access_ranges *ia_ranges;
+
+	/**
+	 * @srcu: Sleepable RCU. Use as lock when type of the request queue
+	 * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
+	 */
+	struct srcu_struct	srcu[];
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */
 #define QUEUE_FLAG_STOPPED	0	/* queue is stopped */
 #define QUEUE_FLAG_DYING	1	/* queue being torn down */
+#define QUEUE_FLAG_HAS_SRCU	2	/* SRCU is allocated */
 #define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
@@ -415,6 +423,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
+#define blk_queue_has_srcu(q)	test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
 #define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
-- 
cgit v1.2.3


From 0cc3a8017900f856f9bf4fdc41c2b5cb1670aabe Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Thu, 2 Dec 2021 13:01:56 -0800
Subject: qed*: enhance tx timeout debug info

This patch add some new qed APIs to query status block
info and report various data to MFW on tx timeout event

Along with that it enhances qede to dump more debug logs
(not just specific to the queue which was reported by stack)
on tx timeout which includes various other basic metadata about
all tx queues and other info (like status block etc.)

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Alok Prasad <palok@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/qed/qed_if.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 0dae7fcc5ef2..9f4bfa2a4829 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -807,6 +807,12 @@ struct qed_devlink {
 	struct devlink_health_reporter *fw_reporter;
 };
 
+struct qed_sb_info_dbg {
+	u32 igu_prod;
+	u32 igu_cons;
+	u16 pi[PIS_PER_SB];
+};
+
 struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
 	void (*link_update)(void *dev, struct qed_link_output *link);
@@ -1194,6 +1200,11 @@ struct qed_common_ops {
 	struct devlink* (*devlink_register)(struct qed_dev *cdev);
 
 	void (*devlink_unregister)(struct devlink *devlink);
+
+	__printf(2, 3) void (*mfw_report)(struct qed_dev *cdev, char *fmt, ...);
+
+	int (*get_sb_info)(struct qed_dev *cdev, struct qed_sb_info *sb,
+			   u16 qid, struct qed_sb_info_dbg *sb_dbg);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 823163ba6e52e644be5df4539a19e3df8d0988dd Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Thu, 2 Dec 2021 13:01:57 -0800
Subject: qed*: esl priv flag support through ethtool

ESL(Enhanced System Lockdown) was designed to lock PCI adapter firmware
images and prevent changes to critical non-volatile configuration data
so that uncontrolled, malicious or unintentional modification to the
adapters are avoided, ensuring it's operational state. Once this feature is
enabled, the device is locked, rejecting any modification to non-volatile
images. Once unlocked, the protection is off such that firmware and
non-volatile configurations may be altered.

Driver just reflects the capability and status of this through
the ethtool private flag.

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Alok Prasad <palok@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/qed/qed_if.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 9f4bfa2a4829..6dc4943d8aec 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -652,6 +652,7 @@ struct qed_dev_info {
 
 	bool wol_support;
 	bool smart_an;
+	bool esl;
 
 	/* MBI version */
 	u32 mbi_version;
@@ -1205,6 +1206,8 @@ struct qed_common_ops {
 
 	int (*get_sb_info)(struct qed_dev *cdev, struct qed_sb_info *sb,
 			   u16 qid, struct qed_sb_info_dbg *sb_dbg);
+
+	int (*get_esl_status)(struct qed_dev *cdev, bool *esl_active);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From a3642021923b26d86bb27d88c826494827612c06 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 29 Nov 2021 18:46:47 +0100
Subject: locking/rtmutex: Add rt_mutex_lock_nest_lock() and
 rt_mutex_lock_killable().

The locking selftest for ww-mutex expects to operate directly on the
base-mutex which becomes a rtmutex on PREEMPT_RT.

Add a rtmutex based implementation of mutex_lock_nest_lock() and
mutex_lock_killable() named rt_mutex_lock_nest_lock() abd
rt_mutex_lock_killable().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211129174654.668506-5-bigeasy@linutronix.de
---
 include/linux/rtmutex.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 9deedfeec2b1..7d049883a08a 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock);
 #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#define rt_mutex_lock_nest_lock(lock, nest_lock)			\
+	do {								\
+		typecheck(struct lockdep_map *, &(nest_lock)->dep_map);	\
+		_rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
+	} while (0)
+
 #else
 extern void rt_mutex_lock(struct rt_mutex *lock);
 #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock)
 #endif
 
 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
+extern int rt_mutex_lock_killable(struct rt_mutex *lock);
 extern int rt_mutex_trylock(struct rt_mutex *lock);
 
 extern void rt_mutex_unlock(struct rt_mutex *lock);
-- 
cgit v1.2.3


From 0c1d7a2c2d32fac7ff4a644724b2d52a64184645 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 29 Nov 2021 18:46:48 +0100
Subject: lockdep: Remove softirq accounting on PREEMPT_RT.

There is not really a softirq context on PREEMPT_RT.  Softirqs on
PREEMPT_RT are always invoked within the context of a threaded
interrupt handler or within ksoftirqd. The "in-softirq" context is
preemptible and is protected by a per-CPU lock to ensure mutual
exclusion.

There is no difference on PREEMPT_RT between spin_lock_irq() and
spin_lock() because the former does not disable interrupts. Therefore
if a lock is used in_softirq() and locked once with spin_lock_irq()
then lockdep will report this with "inconsistent {SOFTIRQ-ON-W} ->
{IN-SOFTIRQ-W} usage".

Teach lockdep that we don't really do softirqs on -RT.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211129174654.668506-6-bigeasy@linutronix.de
---
 include/linux/irqflags.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 600c10da321a..4b140938b03e 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -71,14 +71,6 @@ do {						\
 do {						\
 	__this_cpu_dec(hardirq_context);	\
 } while (0)
-# define lockdep_softirq_enter()		\
-do {						\
-	current->softirq_context++;		\
-} while (0)
-# define lockdep_softirq_exit()			\
-do {						\
-	current->softirq_context--;		\
-} while (0)
 
 # define lockdep_hrtimer_enter(__hrtimer)		\
 ({							\
@@ -140,6 +132,21 @@ do {						\
 # define lockdep_irq_work_exit(__work)		do { } while (0)
 #endif
 
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
+# define lockdep_softirq_enter()		\
+do {						\
+	current->softirq_context++;		\
+} while (0)
+# define lockdep_softirq_exit()			\
+do {						\
+	current->softirq_context--;		\
+} while (0)
+
+#else
+# define lockdep_softirq_enter()		do { } while (0)
+# define lockdep_softirq_exit()			do { } while (0)
+#endif
+
 #if defined(CONFIG_IRQSOFF_TRACER) || \
 	defined(CONFIG_PREEMPT_TRACER)
  extern void stop_critical_timings(void);
-- 
cgit v1.2.3


From c0bed69daf4b67809b58cc7cd81a8fa4f45bc161 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 3 Dec 2021 15:59:34 +0800
Subject: locking: Make owner_on_cpu() into <linux/sched.h>

Move the owner_on_cpu() from kernel/locking/rwsem.c into
include/linux/sched.h with under CONFIG_SMP, then use it
in the mutex/rwsem/rtmutex to simplify the code.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211203075935.136808-2-wangkefeng.wang@huawei.com
---
 include/linux/sched.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..ff609d9c2f21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2171,6 +2171,15 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SMP
+static inline bool owner_on_cpu(struct task_struct *owner)
+{
+	/*
+	 * As lock holder preemption issue, we both skip spinning if
+	 * task is not on cpu or its cpu is preempted
+	 */
+	return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+}
+
 /* Returns effective CPU energy utilization, as seen by the scheduler */
 unsigned long sched_cpu_util(int cpu, unsigned long max);
 #endif /* CONFIG_SMP */
-- 
cgit v1.2.3


From 4cf75fd4a2545ca4deea992f929602c9fdbe8058 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 3 Dec 2021 15:59:35 +0800
Subject: locking: Mark racy reads of owner->on_cpu

One of the more frequent data races reported by KCSAN is the racy read
in mutex_spin_on_owner(), which is usually reported as "race of unknown
origin" without showing the writer. This is due to the racing write
occurring in kernel/sched. Locally enabling KCSAN in kernel/sched shows:

 | write (marked) to 0xffff97f205079934 of 4 bytes by task 316 on cpu 6:
 |  finish_task                kernel/sched/core.c:4632 [inline]
 |  finish_task_switch         kernel/sched/core.c:4848
 |  context_switch             kernel/sched/core.c:4975 [inline]
 |  __schedule                 kernel/sched/core.c:6253
 |  schedule                   kernel/sched/core.c:6326
 |  schedule_preempt_disabled  kernel/sched/core.c:6385
 |  __mutex_lock_common        kernel/locking/mutex.c:680
 |  __mutex_lock               kernel/locking/mutex.c:740 [inline]
 |  __mutex_lock_slowpath      kernel/locking/mutex.c:1028
 |  mutex_lock                 kernel/locking/mutex.c:283
 |  tty_open_by_driver         drivers/tty/tty_io.c:2062 [inline]
 |  ...
 |
 | read to 0xffff97f205079934 of 4 bytes by task 322 on cpu 3:
 |  mutex_spin_on_owner        kernel/locking/mutex.c:370
 |  mutex_optimistic_spin      kernel/locking/mutex.c:480
 |  __mutex_lock_common        kernel/locking/mutex.c:610
 |  __mutex_lock               kernel/locking/mutex.c:740 [inline]
 |  __mutex_lock_slowpath      kernel/locking/mutex.c:1028
 |  mutex_lock                 kernel/locking/mutex.c:283
 |  tty_open_by_driver         drivers/tty/tty_io.c:2062 [inline]
 |  ...
 |
 | value changed: 0x00000001 -> 0x00000000

This race is clearly intentional, and the potential for miscompilation
is slim due to surrounding barrier() and cpu_relax(), and the value
being used as a boolean.

Nevertheless, marking this reader would more clearly denote intent and
make it obvious that concurrency is expected. Use READ_ONCE() to avoid
having to reason about compiler optimizations now and in future.

With previous refactor, mark the read to owner->on_cpu in owner_on_cpu(),
which immediately precedes the loop executing mutex_spin_on_owner().

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211203075935.136808-3-wangkefeng.wang@huawei.com
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff609d9c2f21..0b9b0e3f4791 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2177,7 +2177,7 @@ static inline bool owner_on_cpu(struct task_struct *owner)
 	 * As lock holder preemption issue, we both skip spinning if
 	 * task is not on cpu or its cpu is preempted
 	 */
-	return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+	return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner));
 }
 
 /* Returns effective CPU energy utilization, as seen by the scheduler */
-- 
cgit v1.2.3


From fb08a1908cb119a4585611d91461ab6d27756b14 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:21:38 +0100
Subject: dax: simplify the dax_device <-> gendisk association

Replace the dax_host_hash with an xarray indexed by the pointer value
of the gendisk, and require explicitly calls from the block drivers that
want to associate their gendisk with a dax_device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-5-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8623caa67388..e2e9a67004cb 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -11,9 +11,11 @@
 
 typedef unsigned long dax_entry_t;
 
+struct dax_device;
+struct gendisk;
 struct iomap_ops;
 struct iomap;
-struct dax_device;
+
 struct dax_operations {
 	/*
 	 * direct_access: translate a device-relative
@@ -39,8 +41,8 @@ struct dax_operations {
 };
 
 #if IS_ENABLED(CONFIG_DAX)
-struct dax_device *alloc_dax(void *private, const char *host,
-		const struct dax_operations *ops, unsigned long flags);
+struct dax_device *alloc_dax(void *private, const struct dax_operations *ops,
+		unsigned long flags);
 void put_dax(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
@@ -68,7 +70,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 	return dax_synchronous(dax_dev);
 }
 #else
-static inline struct dax_device *alloc_dax(void *private, const char *host,
+static inline struct dax_device *alloc_dax(void *private,
 		const struct dax_operations *ops, unsigned long flags)
 {
 	/*
@@ -107,6 +109,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
+int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
+void dax_remove_host(struct gendisk *disk);
 bool generic_fsdax_supported(struct dax_device *dax_dev,
 		struct block_device *bdev, int blocksize, sector_t start,
 		sector_t sectors);
@@ -128,6 +132,13 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
 dax_entry_t dax_lock_page(struct page *page);
 void dax_unlock_page(struct page *page, dax_entry_t cookie);
 #else
+static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
+{
+	return 0;
+}
+static inline void dax_remove_host(struct gendisk *disk)
+{
+}
 #define generic_fsdax_supported		NULL
 
 static inline bool dax_supported(struct dax_device *dax_dev,
-- 
cgit v1.2.3


From 7b0800d00dae8c897398abaf61e82db0d67d7afc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:21:42 +0100
Subject: dax: remove dax_capable

Just open code the block size and dax_dev == NULL checks in the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> [erofs]
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-9-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index e2e9a67004cb..439c3c70e347 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -111,12 +111,6 @@ int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
 void dax_remove_host(struct gendisk *disk);
-bool generic_fsdax_supported(struct dax_device *dax_dev,
-		struct block_device *bdev, int blocksize, sector_t start,
-		sector_t sectors);
-
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
-		int blocksize, sector_t start, sector_t len);
 
 static inline void fs_put_dax(struct dax_device *dax_dev)
 {
@@ -139,14 +133,6 @@ static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
 static inline void dax_remove_host(struct gendisk *disk)
 {
 }
-#define generic_fsdax_supported		NULL
-
-static inline bool dax_supported(struct dax_device *dax_dev,
-		struct block_device *bdev, int blocksize, sector_t start,
-		sector_t len)
-{
-	return false;
-}
 
 static inline void fs_put_dax(struct dax_device *dax_dev)
 {
-- 
cgit v1.2.3


From 60696eb26a37ab0199f7833ddbc1b75138c36d16 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:21:48 +0100
Subject: fsdax: simplify the pgoff calculation

Replace the two steps of dax_iomap_sector and bdev_dax_pgoff with a
single dax_iomap_pgoff helper that avoids lots of cumbersome sector
conversions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-15-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 439c3c70e347..324363b798ec 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -107,7 +107,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 #endif
 
 struct writeback_control;
-int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
 void dax_remove_host(struct gendisk *disk);
-- 
cgit v1.2.3


From c6f40468657d16e4010ef84bf32a761feb3469ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:21:52 +0100
Subject: fsdax: decouple zeroing from the iomap buffered I/O code

Unshare the DAX and iomap buffered I/O page zeroing code.  This code
previously did a IS_DAX check deep inside the iomap code, which in
fact was the only DAX check in the code.  Instead move these checks
into the callers.  Most callers already have DAX special casing anyway
and XFS will need it for reflink support as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-19-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 324363b798ec..b79036743e7f 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -14,6 +14,7 @@ typedef unsigned long dax_entry_t;
 struct dax_device;
 struct gendisk;
 struct iomap_ops;
+struct iomap_iter;
 struct iomap;
 
 struct dax_operations {
@@ -170,6 +171,11 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
 }
 #endif
 
+int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
+		const struct iomap_ops *ops);
+int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
+		const struct iomap_ops *ops);
+
 #if IS_ENABLED(CONFIG_DAX)
 int dax_read_lock(void);
 void dax_read_unlock(int id);
@@ -204,7 +210,6 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
 				      pgoff_t index);
-s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
 static inline bool dax_mapping(struct address_space *mapping)
 {
 	return mapping->host && IS_DAX(mapping->host);
-- 
cgit v1.2.3


From 952da06375c8f3aa58474fff718d9ae8442531b9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:21:58 +0100
Subject: iomap: add a IOMAP_DAX flag

Add a flag so that the file system can easily detect DAX operations
based just on the iomap operation requested instead of looking at
inode state using IS_DAX.  This will be needed to apply the to be
added partition offset only for operations that actually use DAX,
but not things like fiemap that are based on the block device.
In the long run it should also allow turning the bdev, dax_dev
and inline_data into a union.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-25-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/iomap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6d1b08d0ae93..5b9432f9f79e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -141,6 +141,11 @@ struct iomap_page_ops {
 #define IOMAP_NOWAIT		(1 << 5) /* do not block */
 #define IOMAP_OVERWRITE_ONLY	(1 << 6) /* only pure overwrites allowed */
 #define IOMAP_UNSHARE		(1 << 7) /* unshare_file_range */
+#ifdef CONFIG_FS_DAX
+#define IOMAP_DAX		(1 << 8) /* DAX mapping */
+#else
+#define IOMAP_DAX		0
+#endif /* CONFIG_FS_DAX */
 
 struct iomap_ops {
 	/*
-- 
cgit v1.2.3


From cd913c76f489def1a388e3a5b10df94948ede3f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:21:59 +0100
Subject: dax: return the partition offset from fs_dax_get_by_bdev

Prepare for the removal of the block_device from the DAX I/O path by
returning the partition offset from fs_dax_get_by_bdev so that the file
systems have it at hand for use during I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-26-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index b79036743e7f..f6f353382cc9 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -117,7 +117,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 	put_dax(dax_dev);
 }
 
-struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
+struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
+		u64 *start_off);
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct dax_device *dax_dev, struct writeback_control *wbc);
 
@@ -138,7 +139,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 {
 }
 
-static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
+static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
+		u64 *start_off)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 2ede892342b3c628991ff1b9060108a7edd92d94 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 29 Nov 2021 11:22:01 +0100
Subject: dax: fix up some of the block device related ifdefs

The DAX device <-> block device association is only enabled if
CONFIG_BLOCK is enabled.  Update dax.h to account for that and use
the right conditions for the fs_put_dax stub as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20211129102203.2243509-28-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index f6f353382cc9..87ae4c9b1d65 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -108,24 +108,15 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 #endif
 
 struct writeback_control;
-#if IS_ENABLED(CONFIG_FS_DAX)
+#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
 int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
 void dax_remove_host(struct gendisk *disk);
-
+struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
+		u64 *start_off);
 static inline void fs_put_dax(struct dax_device *dax_dev)
 {
 	put_dax(dax_dev);
 }
-
-struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
-		u64 *start_off);
-int dax_writeback_mapping_range(struct address_space *mapping,
-		struct dax_device *dax_dev, struct writeback_control *wbc);
-
-struct page *dax_layout_busy_page(struct address_space *mapping);
-struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
-dax_entry_t dax_lock_page(struct page *page);
-void dax_unlock_page(struct page *page, dax_entry_t cookie);
 #else
 static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
 {
@@ -134,17 +125,25 @@ static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
 static inline void dax_remove_host(struct gendisk *disk)
 {
 }
-
-static inline void fs_put_dax(struct dax_device *dax_dev)
-{
-}
-
 static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
 		u64 *start_off)
 {
 	return NULL;
 }
+static inline void fs_put_dax(struct dax_device *dax_dev)
+{
+}
+#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
 
+#if IS_ENABLED(CONFIG_FS_DAX)
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct dax_device *dax_dev, struct writeback_control *wbc);
+
+struct page *dax_layout_busy_page(struct address_space *mapping);
+struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
+dax_entry_t dax_lock_page(struct page *page);
+void dax_unlock_page(struct page *page, dax_entry_t cookie);
+#else
 static inline struct page *dax_layout_busy_page(struct address_space *mapping)
 {
 	return NULL;
-- 
cgit v1.2.3


From 866de407444398bc8140ea70de1dba5f91cc34ac Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 3 Dec 2021 13:30:01 +0800
Subject: bpf: Disallow BPF_LOG_KERNEL log level for bpf(BPF_BTF_LOAD)

BPF_LOG_KERNEL is only used internally, so disallow bpf_btf_load()
to set log level as BPF_LOG_KERNEL. The same checking has already
been done in bpf_check(), so factor out a helper to check the
validity of log attributes and use it in both places.

Fixes: 8580ac9404f6 ("bpf: Process in-kernel BTF")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20211203053001.740945-1-houtao1@huawei.com
---
 include/linux/bpf_verifier.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c8a78e830fca..182b16a91084 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -396,6 +396,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 		 log->level == BPF_LOG_KERNEL);
 }
 
+static inline bool
+bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
+{
+	return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 &&
+	       log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
+}
+
 #define BPF_MAX_SUBPROGS 256
 
 struct bpf_subprog_info {
-- 
cgit v1.2.3


From ccf7cf92373d1a53166582013430b3b9c05a6ba2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 16 Jul 2021 09:39:15 -0500
Subject: f2fs: fix the f2fs_file_write_iter tracepoint

Pass in the original position and count rather than the position and
count that were updated by the write.  Also use the correct types for
all arguments, in particular the file offset which was being truncated
to 32 bits on 32-bit platforms.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index f8cb916f3595..dcb94d740e12 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
 
 TRACE_EVENT(f2fs_file_write_iter,
 
-	TP_PROTO(struct inode *inode, unsigned long offset,
-		unsigned long length, int ret),
+	TP_PROTO(struct inode *inode, loff_t offset, size_t length,
+		 ssize_t ret),
 
 	TP_ARGS(inode, offset, length, ret),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
-		__field(unsigned long, offset)
-		__field(unsigned long, length)
-		__field(int,	ret)
+		__field(loff_t, offset)
+		__field(size_t, length)
+		__field(ssize_t, ret)
 	),
 
 	TP_fast_assign(
@@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter,
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, "
-		"offset = %lu, length = %lu, written(err) = %d",
+		"offset = %lld, length = %zu, written(err) = %zd",
 		show_dev_ino(__entry),
 		__entry->offset,
 		__entry->length,
-- 
cgit v1.2.3


From b80892ca022e9eb484771a66eb68e12364695a2a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Oct 2021 17:10:17 +0200
Subject: memremap: remove support for external pgmap refcounts

No driver is left using the external pgmap refcount, so remove the
code to support it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211028151017.50234-1-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/memremap.h | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index c0e9d35889e8..a8bc588fe7aa 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -72,16 +72,6 @@ struct dev_pagemap_ops {
 	 */
 	void (*page_free)(struct page *page);
 
-	/*
-	 * Transition the refcount in struct dev_pagemap to the dead state.
-	 */
-	void (*kill)(struct dev_pagemap *pgmap);
-
-	/*
-	 * Wait for refcount in struct dev_pagemap to be idle and reap it.
-	 */
-	void (*cleanup)(struct dev_pagemap *pgmap);
-
 	/*
 	 * Used for private (un-addressable) device memory only.  Must migrate
 	 * the page back to a CPU accessible page.
@@ -95,8 +85,7 @@ struct dev_pagemap_ops {
  * struct dev_pagemap - metadata for ZONE_DEVICE mappings
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
  * @ref: reference count that pins the devm_memremap_pages() mapping
- * @internal_ref: internal reference if @ref is not provided by the caller
- * @done: completion for @internal_ref
+ * @done: completion for @ref
  * @type: memory type: see MEMORY_* in memory_hotplug.h
  * @flags: PGMAP_* flags to specify defailed behavior
  * @ops: method table
@@ -109,8 +98,7 @@ struct dev_pagemap_ops {
  */
 struct dev_pagemap {
 	struct vmem_altmap altmap;
-	struct percpu_ref *ref;
-	struct percpu_ref internal_ref;
+	struct percpu_ref ref;
 	struct completion done;
 	enum memory_type type;
 	unsigned int flags;
@@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void)
 static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
 {
 	if (pgmap)
-		percpu_ref_put(pgmap->ref);
+		percpu_ref_put(&pgmap->ref);
 }
 
 #endif /* _LINUX_MEMREMAP_H_ */
-- 
cgit v1.2.3


From 02e4079913500f24ceb082d8d87d8665f044b298 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:17:04 +0100
Subject: fs: remove unused low-level mapping helpers

Now that we ported all places to use the new low-level mapping helpers
that are able to support filesystems mounted with an idmapping we can
remove the old low-level mapping helpers. With the removal of these old
helpers we also conclude the renaming of the mapping helpers we started
in commit a65e58e791a1 ("fs: document and rename fsid helpers").

Link: https://lore.kernel.org/r/20211123114227.3124056-8-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-8-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-8-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/mnt_idmapping.h | 56 -------------------------------------------
 1 file changed, 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 60341cd33ccc..0c6ab3f4c952 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -13,62 +13,6 @@ struct user_namespace;
  */
 extern struct user_namespace init_user_ns;
 
-/**
- * kuid_into_mnt - map a kuid down into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kuid: kuid to be mapped
- *
- * Return: @kuid mapped according to @mnt_userns.
- * If @kuid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
-				   kuid_t kuid)
-{
-	return make_kuid(mnt_userns, __kuid_val(kuid));
-}
-
-/**
- * kgid_into_mnt - map a kgid down into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kgid: kgid to be mapped
- *
- * Return: @kgid mapped according to @mnt_userns.
- * If @kgid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
-				   kgid_t kgid)
-{
-	return make_kgid(mnt_userns, __kgid_val(kgid));
-}
-
-/**
- * kuid_from_mnt - map a kuid up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kuid: kuid to be mapped
- *
- * Return: @kuid mapped up according to @mnt_userns.
- * If @kuid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
-				   kuid_t kuid)
-{
-	return KUIDT_INIT(from_kuid(mnt_userns, kuid));
-}
-
-/**
- * kgid_from_mnt - map a kgid up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kgid: kgid to be mapped
- *
- * Return: @kgid mapped up according to @mnt_userns.
- * If @kgid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
-				   kgid_t kgid)
-{
-	return KGIDT_INIT(from_kgid(mnt_userns, kgid));
-}
-
 /**
  * initial_idmapping - check whether this is the initial mapping
  * @ns: idmapping to check
-- 
cgit v1.2.3


From 209188ce75d0d357c292f6bb81d712acdd4e7db7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:17:05 +0100
Subject: fs: port higher-level mapping helpers

Enable the mapped_fs{g,u}id() helpers to support filesystems mounted
with an idmapping. Apart from core mapping helpers that use
mapped_fs{g,u}id() to initialize struct inode's i_{g,u}id fields xfs is
the only place that uses these low-level helpers directly.

The patch only extends the helpers to be able to take the filesystem
idmapping into account. Since we don't actually yet pass the
filesystem's idmapping in no functional changes happen. This will happen
in a final patch.

Link: https://lore.kernel.org/r/20211123114227.3124056-9-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-9-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-9-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h            |  8 ++++----
 include/linux/mnt_idmapping.h | 12 ++++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 57aee6ebba72..e1f28f757f1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1664,7 +1664,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
 static inline void inode_fsuid_set(struct inode *inode,
 				   struct user_namespace *mnt_userns)
 {
-	inode->i_uid = mapped_fsuid(mnt_userns);
+	inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns);
 }
 
 /**
@@ -1678,7 +1678,7 @@ static inline void inode_fsuid_set(struct inode *inode,
 static inline void inode_fsgid_set(struct inode *inode,
 				   struct user_namespace *mnt_userns)
 {
-	inode->i_gid = mapped_fsgid(mnt_userns);
+	inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns);
 }
 
 /**
@@ -1699,10 +1699,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
 	kuid_t kuid;
 	kgid_t kgid;
 
-	kuid = mapped_fsuid(mnt_userns);
+	kuid = mapped_fsuid(mnt_userns, &init_user_ns);
 	if (!uid_valid(kuid))
 		return false;
-	kgid = mapped_fsgid(mnt_userns);
+	kgid = mapped_fsgid(mnt_userns, &init_user_ns);
 	if (!gid_valid(kgid))
 		return false;
 	return kuid_has_mapping(fs_userns, kuid) &&
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 0c6ab3f4c952..ee5a217de2a8 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -196,6 +196,7 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
 /**
  * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
  * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
  *
  * Use this helper to initialize a new vfs or filesystem object based on
  * the caller's fsuid. A common example is initializing the i_uid field of
@@ -205,14 +206,16 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
  *
  * Return: the caller's current fsuid mapped up according to @mnt_userns.
  */
-static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
+static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
+				  struct user_namespace *fs_userns)
 {
-	return mapped_kuid_user(mnt_userns, &init_user_ns, current_fsuid());
+	return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid());
 }
 
 /**
  * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
  * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
  *
  * Use this helper to initialize a new vfs or filesystem object based on
  * the caller's fsgid. A common example is initializing the i_gid field of
@@ -222,9 +225,10 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
  *
  * Return: the caller's current fsgid mapped up according to @mnt_userns.
  */
-static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
+static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
+				  struct user_namespace *fs_userns)
 {
-	return mapped_kgid_user(mnt_userns, &init_user_ns, current_fsgid());
+	return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid());
 }
 
 #endif /* _LINUX_MNT_IDMAPPING_H */
-- 
cgit v1.2.3


From a1ec9040a2a9122605ac26e5725c6de019184419 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:17:06 +0100
Subject: fs: add i_user_ns() helper

Since we'll be passing the filesystem's idmapping in even more places in
the following patches and we do already dereference struct inode to get
to the filesystem's idmapping multiple times add a tiny helper.

Link: https://lore.kernel.org/r/20211123114227.3124056-10-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-10-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-10-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e1f28f757f1b..3d6d514943ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1600,6 +1600,11 @@ struct super_block {
 	struct list_head	s_inodes_wb;	/* writeback inodes */
 } __randomize_layout;
 
+static inline struct user_namespace *i_user_ns(const struct inode *inode)
+{
+	return inode->i_sb->s_user_ns;
+}
+
 /* Helper functions so that in most cases filesystems will
  * not need to deal directly with kuid_t and kgid_t and can
  * instead deal with the raw numeric values that are stored
@@ -1607,22 +1612,22 @@ struct super_block {
  */
 static inline uid_t i_uid_read(const struct inode *inode)
 {
-	return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+	return from_kuid(i_user_ns(inode), inode->i_uid);
 }
 
 static inline gid_t i_gid_read(const struct inode *inode)
 {
-	return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
+	return from_kgid(i_user_ns(inode), inode->i_gid);
 }
 
 static inline void i_uid_write(struct inode *inode, uid_t uid)
 {
-	inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
+	inode->i_uid = make_kuid(i_user_ns(inode), uid);
 }
 
 static inline void i_gid_write(struct inode *inode, gid_t gid)
 {
-	inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
+	inode->i_gid = make_kgid(i_user_ns(inode), gid);
 }
 
 /**
-- 
cgit v1.2.3


From bd303368b776eead1c29e6cdda82bde7128b82a7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 3 Dec 2021 12:17:07 +0100
Subject: fs: support mapped mounts of mapped filesystems

In previous patches we added new and modified existing helpers to handle
idmapped mounts of filesystems mounted with an idmapping. In this final
patch we convert all relevant places in the vfs to actually pass the
filesystem's idmapping into these helpers.

With this the vfs is in shape to handle idmapped mounts of filesystems
mounted with an idmapping. Note that this is just the generic
infrastructure. Actually adding support for idmapped mounts to a
filesystem mountable with an idmapping is follow-up work.

In this patch we extend the definition of an idmapped mount from a mount
that that has the initial idmapping attached to it to a mount that has
an idmapping attached to it which is not the same as the idmapping the
filesystem was mounted with.

As before we do not allow the initial idmapping to be attached to a
mount. In addition this patch prevents that the idmapping the filesystem
was mounted with can be attached to a mount created based on this
filesystem.

This has multiple reasons and advantages. First, attaching the initial
idmapping or the filesystem's idmapping doesn't make much sense as in
both cases the values of the i_{g,u}id and other places where k{g,u}ids
are used do not change. Second, a user that really wants to do this for
whatever reason can just create a separate dedicated identical idmapping
to attach to the mount. Third, we can continue to use the initial
idmapping as an indicator that a mount is not idmapped allowing us to
continue to keep passing the initial idmapping into the mapping helpers
to tell them that something isn't an idmapped mount even if the
filesystem is mounted with an idmapping.

Link: https://lore.kernel.org/r/20211123114227.3124056-11-brauner@kernel.org (v1)
Link: https://lore.kernel.org/r/20211130121032.3753852-11-brauner@kernel.org (v2)
Link: https://lore.kernel.org/r/20211203111707.3901969-11-brauner@kernel.org
Cc: Seth Forshee <sforshee@digitalocean.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
CC: linux-fsdevel@vger.kernel.org
Reviewed-by: Seth Forshee <sforshee@digitalocean.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fs.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3d6d514943ab..493b87e3616b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1641,7 +1641,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
 static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
 				    const struct inode *inode)
 {
-	return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid);
+	return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
 }
 
 /**
@@ -1655,7 +1655,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
 static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
 				    const struct inode *inode)
 {
-	return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid);
+	return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
 }
 
 /**
@@ -1669,7 +1669,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
 static inline void inode_fsuid_set(struct inode *inode,
 				   struct user_namespace *mnt_userns)
 {
-	inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns);
+	inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
 }
 
 /**
@@ -1683,7 +1683,7 @@ static inline void inode_fsuid_set(struct inode *inode,
 static inline void inode_fsgid_set(struct inode *inode,
 				   struct user_namespace *mnt_userns)
 {
-	inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns);
+	inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
 }
 
 /**
@@ -1704,10 +1704,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
 	kuid_t kuid;
 	kgid_t kgid;
 
-	kuid = mapped_fsuid(mnt_userns, &init_user_ns);
+	kuid = mapped_fsuid(mnt_userns, fs_userns);
 	if (!uid_valid(kuid))
 		return false;
-	kgid = mapped_fsgid(mnt_userns, &init_user_ns);
+	kgid = mapped_fsgid(mnt_userns, fs_userns);
 	if (!gid_valid(kgid))
 		return false;
 	return kuid_has_mapping(fs_userns, kuid) &&
@@ -2653,13 +2653,14 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file)
  * is_idmapped_mnt - check whether a mount is mapped
  * @mnt: the mount to check
  *
- * If @mnt has an idmapping attached to it @mnt is mapped.
+ * If @mnt has an idmapping attached different from the
+ * filesystem's idmapping then @mnt is mapped.
  *
  * Return: true if mount is mapped, false if not.
  */
 static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
 {
-	return mnt_user_ns(mnt) != &init_user_ns;
+	return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
 }
 
 extern long vfs_truncate(const struct path *, loff_t);
-- 
cgit v1.2.3


From 019cd8a9e3bcbbf6bac8036a9ae545f7858e0c08 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:02:01 -0600
Subject: ARM: ixp4xx: remove unused header file pata_ixp4xx_cf.h

Commit b00ced38e317 ("ARM: ixp4xx: Delete Avila boardfiles") removed the
last use of <linux/platform_data/pata_ixp4xx_cf.h> but left the header file
in place.  Nothing uses this file, delete it now.

Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/pata_ixp4xx_cf.h | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 include/linux/platform_data/pata_ixp4xx_cf.h

(limited to 'include')

diff --git a/include/linux/platform_data/pata_ixp4xx_cf.h b/include/linux/platform_data/pata_ixp4xx_cf.h
deleted file mode 100644
index e60fa41da4a5..000000000000
--- a/include/linux/platform_data/pata_ixp4xx_cf.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PLATFORM_DATA_PATA_IXP4XX_H
-#define __PLATFORM_DATA_PATA_IXP4XX_H
-
-#include <linux/types.h>
-
-/*
- * This structure provide a means for the board setup code
- * to give information to th pata_ixp4xx driver. It is
- * passed as platform_data.
- */
-struct ixp4xx_pata_data {
-	volatile u32	*cs0_cfg;
-	volatile u32	*cs1_cfg;
-	unsigned long	cs0_bits;
-	unsigned long	cs1_bits;
-	void __iomem	*cmd;
-	void __iomem	*ctl;
-};
-
-#endif
-- 
cgit v1.2.3


From 94aedac49d92b22995d7b9092c6551b8b9924320 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Thu, 4 Nov 2021 09:16:20 +0200
Subject: iommu: Log iova range in map/unmap trace events

In case of an iommu page fault, the faulting iova is logged
in trace_io_page_fault. It is therefore convenient to log
the iova range in mapping/unmapping trace events so that it
is easier to see if the faulting iova was recently in any of
those ranges.

Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Link: https://lore.kernel.org/r/20211104071620.27290-1-dafna.hirschfeld@collabora.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/trace/events/iommu.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h
index 72b4582322ff..29096fe12623 100644
--- a/include/trace/events/iommu.h
+++ b/include/trace/events/iommu.h
@@ -101,8 +101,9 @@ TRACE_EVENT(map,
 		__entry->size = size;
 	),
 
-	TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu",
-			__entry->iova, __entry->paddr, __entry->size
+	TP_printk("IOMMU: iova=0x%016llx - 0x%016llx paddr=0x%016llx size=%zu",
+		  __entry->iova, __entry->iova + __entry->size, __entry->paddr,
+		  __entry->size
 	)
 );
 
@@ -124,8 +125,9 @@ TRACE_EVENT(unmap,
 		__entry->unmapped_size = unmapped_size;
 	),
 
-	TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu",
-			__entry->iova, __entry->size, __entry->unmapped_size
+	TP_printk("IOMMU: iova=0x%016llx - 0x%016llx size=%zu unmapped_size=%zu",
+		  __entry->iova, __entry->iova + __entry->size,
+		  __entry->size, __entry->unmapped_size
 	)
 );
 
-- 
cgit v1.2.3


From 063ebb19d962b45a1b505748d464bd12b5074797 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 1 Dec 2021 17:33:21 +0000
Subject: iommu/virtio: Add definitions for VIRTIO_IOMMU_F_BYPASS_CONFIG

Add definitions for the VIRTIO_IOMMU_F_BYPASS_CONFIG, which supersedes
VIRTIO_IOMMU_F_BYPASS.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20211201173323.1045819-2-jean-philippe@linaro.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/uapi/linux/virtio_iommu.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index 237e36a280cb..1ff357f0d72e 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -16,6 +16,7 @@
 #define VIRTIO_IOMMU_F_BYPASS			3
 #define VIRTIO_IOMMU_F_PROBE			4
 #define VIRTIO_IOMMU_F_MMIO			5
+#define VIRTIO_IOMMU_F_BYPASS_CONFIG		6
 
 struct virtio_iommu_range_64 {
 	__le64					start;
@@ -36,6 +37,8 @@ struct virtio_iommu_config {
 	struct virtio_iommu_range_32		domain_range;
 	/* Probe buffer size */
 	__le32					probe_size;
+	__u8					bypass;
+	__u8					reserved[3];
 };
 
 /* Request types */
@@ -66,11 +69,14 @@ struct virtio_iommu_req_tail {
 	__u8					reserved[3];
 };
 
+#define VIRTIO_IOMMU_ATTACH_F_BYPASS		(1 << 0)
+
 struct virtio_iommu_req_attach {
 	struct virtio_iommu_req_head		head;
 	__le32					domain;
 	__le32					endpoint;
-	__u8					reserved[8];
+	__le32					flags;
+	__u8					reserved[4];
 	struct virtio_iommu_req_tail		tail;
 };
 
-- 
cgit v1.2.3


From 52f982f00b220d097a71a23c149a1d18efc08e63 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Mon, 6 Dec 2021 14:24:06 +0100
Subject: security,selinux: remove security_add_mnt_opt()

Its last user has been removed in commit f2aedb713c28 ("NFS: Add
fs_context support.").

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 2 --
 include/linux/lsm_hooks.h     | 2 --
 include/linux/security.h      | 8 --------
 3 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index ae2228f0711d..a5a724c308d8 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -78,8 +78,6 @@ LSM_HOOK(int, 0, sb_set_mnt_opts, struct super_block *sb, void *mnt_opts,
 LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb,
 	 struct super_block *newsb, unsigned long kern_flags,
 	 unsigned long *set_kern_flags)
-LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val,
-	 int len, void **mnt_opts)
 LSM_HOOK(int, 0, move_mount, const struct path *from_path,
 	 const struct path *to_path)
 LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 52c1990644b9..3bf5c658bc44 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -180,8 +180,6 @@
  *	Copy all security options from a given superblock to another
  *	@oldsb old superblock which contain information to clone
  *	@newsb new superblock which needs filled in
- * @sb_add_mnt_opt:
- * 	Add one mount @option to @mnt_opts.
  * @sb_parse_opts_str:
  *	Parse a string of security data filling in the opts structure
  *	@options string containing all mount options known by the LSM
diff --git a/include/linux/security.h b/include/linux/security.h
index bb301963e333..6d72772182c8 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -313,8 +313,6 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb,
 				struct super_block *newsb,
 				unsigned long kern_flags,
 				unsigned long *set_kern_flags);
-int security_add_mnt_opt(const char *option, const char *val,
-				int len, void **mnt_opts);
 int security_move_mount(const struct path *from_path, const struct path *to_path);
 int security_dentry_init_security(struct dentry *dentry, int mode,
 				  const struct qstr *name,
@@ -711,12 +709,6 @@ static inline int security_sb_clone_mnt_opts(const struct super_block *oldsb,
 	return 0;
 }
 
-static inline int security_add_mnt_opt(const char *option, const char *val,
-					int len, void **mnt_opts)
-{
-	return 0;
-}
-
 static inline int security_move_mount(const struct path *from_path,
 				      const struct path *to_path)
 {
-- 
cgit v1.2.3


From 8ab30a331946c34e4ba022c44df8624acea1c74e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 6 Dec 2021 20:49:48 +0800
Subject: blk-mq: Drop busy_iter_fn blk_mq_hw_ctx argument

The only user of blk_mq_hw_ctx blk_mq_hw_ctx argument is
blk_mq_rq_inflight().

Function blk_mq_rq_inflight() uses the hctx to find the associated request
queue to match against the request. However this same check is already
done in caller bt_iter(), so drop this check.

With that change there are no more users of busy_iter_fn blk_mq_hw_ctx
argument, so drop the argument.

Reviewed-by Hannes Reinecke <hare@suse.de>

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Link: https://lore.kernel.org/r/1638794990-137490-2-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ecdc049b52fa..17ebf29e42d8 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -470,8 +470,7 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
-		bool);
+typedef bool (busy_iter_fn)(struct request *, void *, bool);
 typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
 
 /**
-- 
cgit v1.2.3


From fc39f8d2d1c10ac04976b0a247865bb0cec4dd88 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 6 Dec 2021 20:49:49 +0800
Subject: blk-mq: Delete busy_iter_fn

Typedefs busy_iter_fn and busy_tag_iter_fn are now identical, so delete
busy_iter_fn to reduce duplication.

It would be nicer to delete busy_tag_iter_fn, as the name busy_iter_fn is
less specific.

However busy_tag_iter_fn is used in many different parts of the tree,
unlike busy_iter_fn which is just use in block/, so just take the
straightforward path now, so that we could rename later treewide.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Link: https://lore.kernel.org/r/1638794990-137490-3-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 17ebf29e42d8..772f8f921526 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -470,7 +470,6 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef bool (busy_iter_fn)(struct request *, void *, bool);
 typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
 
 /**
-- 
cgit v1.2.3


From 05770dd0ad110854c7157d95700d7c89979cdb3e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 22 Nov 2021 18:30:12 +0900
Subject: tracing: Support __rel_loc relative dynamic data location attribute

Add '__rel_loc' new dynamic data location attribute which encodes
the data location from the next to the field itself.

The '__data_loc' is used for encoding the dynamic data location on
the trace event record. But '__data_loc' is not useful if the writer
doesn't know the event header (e.g. user event), because it records
the dynamic data offset from the entry of the record, not the field
itself.

This new '__rel_loc' attribute encodes the data location relatively
from the next of the field. For example, when there is a record like
below (the number in the parentheses is the size of fields)

 |header(N)|common(M)|fields(K)|__data_loc(4)|fields(L)|data(G)|

In this case, '__data_loc' field will be

 __data_loc = (G << 16) | (N+M+K+4+L)

If '__rel_loc' is used, this will be

 |header(N)|common(M)|fields(K)|__rel_loc(4)|fields(L)|data(G)|

where

 __rel_loc = (G << 16) | (L)

This case shows L bytes after the '__rel_loc' attribute  field,
if there is no fields after the __rel_loc field, L must be 0.

This is relatively easy (and no need to consider the kernel header
change) when the event data fields are composed by user who doesn't
know header and common fields.

Link: https://lkml.kernel.org/r/163757341258.510314.4214431827833229956.stgit@devnote2

Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2d167ac3452c..3900404aa063 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -782,6 +782,7 @@ enum {
 	FILTER_OTHER = 0,
 	FILTER_STATIC_STRING,
 	FILTER_DYN_STRING,
+	FILTER_RDYN_STRING,
 	FILTER_PTR_STRING,
 	FILTER_TRACE_FN,
 	FILTER_COMM,
-- 
cgit v1.2.3


From 55de2c0b5610cba5a5a93c0788031133c457e689 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 22 Nov 2021 18:30:21 +0900
Subject: tracing: Add '__rel_loc' using trace event macros

Add '__rel_loc' using trace event macros. These macros are usually
not used in the kernel, except for testing purpose.
This also add "rel_" variant of macros for dynamic_array string,
and bitmask.

Link: https://lkml.kernel.org/r/163757342119.510314.816029622439099016.stgit@devnote2

Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/bpf_probe.h    |  16 ++++++
 include/trace/perf.h         |  16 ++++++
 include/trace/trace_events.h | 120 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 150 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index a8e97f84b652..7660a7846586 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -21,6 +21,22 @@
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
 
+#undef __get_rel_dynamic_array
+#define __get_rel_dynamic_array(field)	\
+		((void *)(&__entry->__rel_loc_##field) +	\
+		 sizeof(__entry->__rel_loc_##field) +		\
+		 (__entry->__rel_loc_##field & 0xffff))
+
+#undef __get_rel_dynamic_array_len
+#define __get_rel_dynamic_array_len(field)	\
+		((__entry->__rel_loc_##field >> 16) & 0xffff)
+
+#undef __get_rel_str
+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
+
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+
 #undef __perf_count
 #define __perf_count(c)	(c)
 
diff --git a/include/trace/perf.h b/include/trace/perf.h
index dbc6c74defc3..ea4405de175a 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -21,6 +21,22 @@
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
 
+#undef __get_rel_dynamic_array
+#define __get_rel_dynamic_array(field)	\
+		((void *)(&__entry->__rel_loc_##field) +	\
+		 sizeof(__entry->__rel_loc_##field) +		\
+		 (__entry->__rel_loc_##field & 0xffff))
+
+#undef __get_rel_dynamic_array_len
+#define __get_rel_dynamic_array_len(field)	\
+		((__entry->__rel_loc_##field >> 16) & 0xffff)
+
+#undef __get_rel_str
+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
+
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+
 #undef __perf_count
 #define __perf_count(c)	(__count = (c))
 
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 08810a463880..8c6f7c433518 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
 
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1)
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef __string_len
 #define __string_len(item, src, len) __dynamic_array(char, item, -1)
 
-#undef __bitmask
-#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len)	u32 item;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
 
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
@@ -293,6 +317,19 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __get_str
 #define __get_str(field) ((char *)__get_dynamic_array(field))
 
+#undef __get_rel_dynamic_array
+#define __get_rel_dynamic_array(field)	\
+		((void *)(&__entry->__rel_loc_##field) +	\
+		 sizeof(__entry->__rel_loc_##field) +		\
+		 (__entry->__rel_loc_##field & 0xffff))
+
+#undef __get_rel_dynamic_array_len
+#define __get_rel_dynamic_array_len(field)	\
+		((__entry->__rel_loc_##field >> 16) & 0xffff)
+
+#undef __get_rel_str
+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
+
 #undef __get_bitmask
 #define __get_bitmask(field)						\
 	({								\
@@ -302,6 +339,15 @@ TRACE_MAKE_SYSTEM_STR();
 		trace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
 	})
 
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field)						\
+	({								\
+		void *__bitmask = __get_rel_dynamic_array(field);		\
+		unsigned int __bitmask_size;				\
+		__bitmask_size = __get_rel_dynamic_array_len(field);	\
+		trace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
+	})
+
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
@@ -471,6 +517,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = {	\
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(_type, _item, _len) {			\
+	.type = "__rel_loc " #_type "[]", .name = #_item,		\
+	.size = 4, .align = 4,						\
+	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static struct trace_event_fields trace_event_fields_##call[] = {	\
@@ -519,6 +580,22 @@ static struct trace_event_fields trace_event_fields_##call[] = {	\
 #undef __string_len
 #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
 
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len)				\
+	__item_length = (len) * sizeof(type);				\
+	__data_offsets->item = __data_size +				\
+			       offsetof(typeof(*entry), __data) -	\
+			       offsetof(typeof(*entry), __rel_loc_##item) -	\
+			       sizeof(u32);				\
+	__data_offsets->item |= __item_length << 16;			\
+	__data_size += __item_length;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item,			\
+		    strlen((src) ? (const char *)(src) : "(null)") + 1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1)
 /*
  * __bitmask_size_in_bytes_raw is the number of bytes needed to hold
  * num_possible_cpus().
@@ -542,6 +619,10 @@ static struct trace_event_fields trace_event_fields_##call[] = {	\
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item,	\
 					 __bitmask_size_in_longs(nr_bits))
 
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item,	\
+					 __bitmask_size_in_longs(nr_bits))
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline notrace int trace_event_get_offsets_##call(		\
@@ -706,6 +787,37 @@ static inline notrace int trace_event_get_offsets_##call(		\
 #define __assign_bitmask(dst, src, nr_bits)					\
 	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
 
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item, len)				\
+	__entry->__rel_loc_##item = __data_offsets.item;
+
+#undef __rel_string
+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
+
+#undef __rel_string_len
+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
+
+#undef __assign_rel_str
+#define __assign_rel_str(dst, src)					\
+	strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)");
+
+#undef __assign_rel_str_len
+#define __assign_rel_str_len(dst, src, len)				\
+	do {								\
+		memcpy(__get_rel_str(dst), (src), (len));		\
+		__get_rel_str(dst)[len] = '\0';				\
+	} while (0)
+
+#undef __rel_bitmask
+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
+
+#undef __get_rel_bitmask
+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+
+#undef __assign_rel_bitmask
+#define __assign_rel_bitmask(dst, src, nr_bits)					\
+	memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
@@ -770,6 +882,10 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef __get_dynamic_array_len
 #undef __get_str
 #undef __get_bitmask
+#undef __get_rel_dynamic_array
+#undef __get_rel_dynamic_array_len
+#undef __get_rel_str
+#undef __get_rel_bitmask
 #undef __print_array
 #undef __print_hex_dump
 
-- 
cgit v1.2.3


From 8c33915d77a565b8b5d44e6368e22b6ea300b7a8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 28 Nov 2021 20:00:29 +0100
Subject: platform/x86: wmi: Add no_notify_data flag to struct wmi_driver

Some WMI implementations do notifies on WMI objects without a _WED method
allow WMI drivers to indicate that _WED should not be called for notifies
on the WMI objects the driver is bound to.

Instead the driver's notify callback will simply be called with a NULL
data argument.

Reported-by: Yauhen Kharuzhy <jekhor@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20211128190031.405620-3-hdegoede@redhat.com
---
 include/linux/wmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 2cb3913c1f50..b88d7b58e61e 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -35,6 +35,7 @@ extern int set_required_buffer_size(struct wmi_device *wdev, u64 length);
 struct wmi_driver {
 	struct device_driver driver;
 	const struct wmi_device_id *id_table;
+	bool no_notify_data;
 
 	int (*probe)(struct wmi_device *wdev, const void *context);
 	void (*remove)(struct wmi_device *wdev);
-- 
cgit v1.2.3


From 4e66934eaadc83b27ada8d42b60894018f3bfabf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:21:55 -0800
Subject: lib: add reference counting tracking infrastructure

It can be hard to track where references are taken and released.

In networking, we have annoying issues at device or netns dismantles,
and we had various proposals to ease root causing them.

This patch adds new infrastructure pairing refcount increases
and decreases. This will self document code, because programmers
will have to associate increments/decrements.

This is controled by CONFIG_REF_TRACKER which can be selected
by users of this feature.

This adds both cpu and memory costs, and thus should probably be
used with care.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ref_tracker.h | 73 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 include/linux/ref_tracker.h

(limited to 'include')

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
new file mode 100644
index 000000000000..c11c9db5825c
--- /dev/null
+++ b/include/linux/ref_tracker.h
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#ifndef _LINUX_REF_TRACKER_H
+#define _LINUX_REF_TRACKER_H
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+struct ref_tracker;
+
+struct ref_tracker_dir {
+#ifdef CONFIG_REF_TRACKER
+	spinlock_t		lock;
+	unsigned int		quarantine_avail;
+	refcount_t		untracked;
+	struct list_head	list; /* List of active trackers */
+	struct list_head	quarantine; /* List of dead trackers */
+#endif
+};
+
+#ifdef CONFIG_REF_TRACKER
+static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count)
+{
+	INIT_LIST_HEAD(&dir->list);
+	INIT_LIST_HEAD(&dir->quarantine);
+	spin_lock_init(&dir->lock);
+	dir->quarantine_avail = quarantine_count;
+	refcount_set(&dir->untracked, 1);
+}
+
+void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit);
+
+int ref_tracker_alloc(struct ref_tracker_dir *dir,
+		      struct ref_tracker **trackerp, gfp_t gfp);
+
+int ref_tracker_free(struct ref_tracker_dir *dir,
+		     struct ref_tracker **trackerp);
+
+#else /* CONFIG_REF_TRACKER */
+
+static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count)
+{
+}
+
+static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
+{
+}
+
+static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+					 unsigned int display_limit)
+{
+}
+
+static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
+				    struct ref_tracker **trackerp,
+				    gfp_t gfp)
+{
+	return 0;
+}
+
+static inline int ref_tracker_free(struct ref_tracker_dir *dir,
+				   struct ref_tracker **trackerp)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* _LINUX_REF_TRACKER_H */
-- 
cgit v1.2.3


From 4d92b95ff2f95f13df9bad0b5a25a9f60e72758d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:21:57 -0800
Subject: net: add net device refcount tracker infrastructure

net device are refcounted. Over the years we had numerous bugs
caused by imbalanced dev_hold() and dev_put() calls.

The general idea is to be able to precisely pair each decrement with
a corresponding prior increment. Both share a cookie, basically
a pointer to private data storing stack traces.

This patch adds dev_hold_track() and dev_put_track().

To use these helpers, each data structure owning a refcount
should also use a "netdevice_tracker" to pair the hold and put.

netdevice_tracker dev_tracker;
...
dev_hold_track(dev, &dev_tracker, GFP_ATOMIC);
...
dev_put_track(dev, &dev_tracker);

Whenever a leak happens, we will get precise stack traces
of the point dev_hold_track() happened, at device dismantle phase.

We will also get a stack trace if too many dev_put_track() for the same
netdevice_tracker are attempted.

This is guarded by CONFIG_NET_DEV_REFCNT_TRACKER option.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 65117f01d5f2..143d60ed0047 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -48,6 +48,7 @@
 #include <uapi/linux/pkt_cls.h>
 #include <linux/hashtable.h>
 #include <linux/rbtree.h>
+#include <linux/ref_tracker.h>
 
 struct netpoll_info;
 struct device;
@@ -300,6 +301,12 @@ enum netdev_state_t {
 };
 
 
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+typedef struct ref_tracker *netdevice_tracker;
+#else
+typedef struct {} netdevice_tracker;
+#endif
+
 struct gro_list {
 	struct list_head	list;
 	int			count;
@@ -1865,6 +1872,7 @@ enum netdev_ml_priv_type {
  *	@proto_down_reason:	reason a netdev interface is held down
  *	@pcpu_refcnt:		Number of references to this device
  *	@dev_refcnt:		Number of references to this device
+ *	@refcnt_tracker:	Tracker directory for tracked references to this device
  *	@todo_list:		Delayed register/unregister
  *	@link_watch_list:	XXX: need comments on this one
  *
@@ -2178,6 +2186,7 @@ struct net_device {
 #else
 	refcount_t		dev_refcnt;
 #endif
+	struct ref_tracker_dir	refcnt_tracker;
 
 	struct list_head	link_watch_list;
 
@@ -3805,6 +3814,7 @@ void netdev_run_todo(void);
  *	@dev: network device
  *
  * Release reference to device to allow it to be freed.
+ * Try using dev_put_track() instead.
  */
 static inline void dev_put(struct net_device *dev)
 {
@@ -3822,6 +3832,7 @@ static inline void dev_put(struct net_device *dev)
  *	@dev: network device
  *
  * Hold reference to device to keep it from being freed.
+ * Try using dev_hold_track() instead.
  */
 static inline void dev_hold(struct net_device *dev)
 {
@@ -3834,6 +3845,40 @@ static inline void dev_hold(struct net_device *dev)
 	}
 }
 
+static inline void netdev_tracker_alloc(struct net_device *dev,
+					netdevice_tracker *tracker, gfp_t gfp)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+#endif
+}
+
+static inline void netdev_tracker_free(struct net_device *dev,
+				       netdevice_tracker *tracker)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	ref_tracker_free(&dev->refcnt_tracker, tracker);
+#endif
+}
+
+static inline void dev_hold_track(struct net_device *dev,
+				  netdevice_tracker *tracker, gfp_t gfp)
+{
+	if (dev) {
+		dev_hold(dev);
+		netdev_tracker_alloc(dev, tracker, gfp);
+	}
+}
+
+static inline void dev_put_track(struct net_device *dev,
+				 netdevice_tracker *tracker)
+{
+	if (dev) {
+		netdev_tracker_free(dev, tracker);
+		dev_put(dev);
+	}
+}
+
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
-- 
cgit v1.2.3


From 80e8921b2b72c300ca56a01729004d30bedb82cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:21:58 -0800
Subject: net: add net device refcount tracker to struct netdev_rx_queue

This helps debugging net device refcount leaks.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 143d60ed0047..3d691fadd569 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -741,6 +741,8 @@ struct netdev_rx_queue {
 #endif
 	struct kobject			kobj;
 	struct net_device		*dev;
+	netdevice_tracker		dev_tracker;
+
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool            *pool;
 #endif
-- 
cgit v1.2.3


From 0b688f24b7d611db3a02f3d4ab562d049c78a17d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:21:59 -0800
Subject: net: add net device refcount tracker to struct netdev_queue

This will help debugging pesky netdev reference leaks.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3d691fadd569..b4f704337f65 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -586,6 +586,8 @@ struct netdev_queue {
  * read-mostly part
  */
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
+
 	struct Qdisc __rcu	*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
-- 
cgit v1.2.3


From 4dbd24f65c60259ce5d1563433ecaf5fab693c83 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:02 -0800
Subject: drop_monitor: add net device refcount tracker

We want to track all dev_hold()/dev_put() to ease leak hunting.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 043fcec8b0aa..3276a29f2b81 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -664,13 +664,17 @@ struct devlink_health_reporter_ops {
  * @trap_name: Trap name.
  * @trap_group_name: Trap group name.
  * @input_dev: Input netdevice.
+ * @dev_tracker: refcount tracker for @input_dev.
  * @fa_cookie: Flow action user cookie.
  * @trap_type: Trap type.
  */
 struct devlink_trap_metadata {
 	const char *trap_name;
 	const char *trap_group_name;
+
 	struct net_device *input_dev;
+	netdevice_tracker dev_tracker;
+
 	const struct flow_action_cookie *fa_cookie;
 	enum devlink_trap_type trap_type;
 };
-- 
cgit v1.2.3


From 9038c320001dd07f60736018edf608ac5baca0ab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:03 -0800
Subject: net: dst: add net device refcount tracking to dst_entry

We want to track all dev_hold()/dev_put() to ease leak hunting.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 17 +++++++++++++++++
 include/net/dst.h         |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4f704337f65..afed3b10491b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3883,6 +3883,23 @@ static inline void dev_put_track(struct net_device *dev,
 	}
 }
 
+static inline void dev_replace_track(struct net_device *odev,
+				     struct net_device *ndev,
+				     netdevice_tracker *tracker,
+				     gfp_t gfp)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	if (odev)
+		ref_tracker_free(&odev->refcnt_tracker, tracker);
+#endif
+	dev_hold(ndev);
+	dev_put(odev);
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	if (ndev)
+		ref_tracker_alloc(&ndev->refcnt_tracker, tracker, gfp);
+#endif
+}
+
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
diff --git a/include/net/dst.h b/include/net/dst.h
index a057319aabef..6aa252c3fc55 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -77,6 +77,7 @@ struct dst_entry {
 #ifndef CONFIG_64BIT
 	atomic_t		__refcnt;	/* 32-bit offset 64 */
 #endif
+	netdevice_tracker	dev_tracker;
 };
 
 struct dst_metrics {
-- 
cgit v1.2.3


From c0fd407a0666a583a765cfb129c4dc492590ca89 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:05 -0800
Subject: sit: add net device refcount tracking to ip_tunnel

Note that other ip_tunnel users do not seem to hold a reference
on tunnel->dev. Probably needs some investigations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_tunnels.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index bc3b13ec93c9..0219fe907b26 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -104,7 +104,10 @@ struct metadata_dst;
 struct ip_tunnel {
 	struct ip_tunnel __rcu	*next;
 	struct hlist_node hash_node;
+
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
+
 	struct net		*net;	/* netns for packet i/o */
 
 	unsigned long	err_time;	/* Time when the last ICMP error
-- 
cgit v1.2.3


From 56c1c77948ba3576df1c387cefcf3bab93600822 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:06 -0800
Subject: ipv6: add net device refcount tracker to struct ip6_tnl

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip6_tunnel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 028eaea1c854..a38c4f1e4e5c 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -46,6 +46,7 @@ struct __ip6_tnl_parm {
 struct ip6_tnl {
 	struct ip6_tnl __rcu *next;	/* next tunnel in list */
 	struct net_device *dev;	/* virtual device associated with tunnel */
+	netdevice_tracker dev_tracker;
 	struct net *net;	/* netns for packet i/o */
 	struct __ip6_tnl_parm parms;	/* tunnel configuration parameters */
 	struct flowi fl;	/* flowi template for xmit */
-- 
cgit v1.2.3


From 85662c9f8cbd4c96088ff99f56bc3d1097d0ac07 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:07 -0800
Subject: net: add net device refcount tracker to struct neighbour

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 55af812c3ed5..190b07fe089e 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -158,6 +158,7 @@ struct neighbour {
 	struct list_head	managed_list;
 	struct rcu_head		rcu;
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
 	u8			primary_key[0];
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 77a23b1f954381d7999ce069d3fc8658eb6a9bbc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:08 -0800
Subject: net: add net device refcount tracker to struct pneigh_entry

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 190b07fe089e..5fffb783670a 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -174,6 +174,7 @@ struct pneigh_entry {
 	struct pneigh_entry	*next;
 	possible_net_t		net;
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
 	u32			flags;
 	u8			protocol;
 	u8			key[];
-- 
cgit v1.2.3


From 08d622568e5a58adebc8cb801599d3f181a6b687 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:09 -0800
Subject: net: add net device refcount tracker to struct neigh_parms

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 5fffb783670a..937389e04c8e 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -70,6 +70,7 @@ enum {
 struct neigh_parms {
 	possible_net_t net;
 	struct net_device *dev;
+	netdevice_tracker dev_tracker;
 	struct list_head list;
 	int	(*neigh_setup)(struct neighbour *);
 	struct neigh_table *tbl;
-- 
cgit v1.2.3


From 8c727003c4d0c776bd286d65c347591734d1d841 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:11 -0800
Subject: ipv6: add net device refcount tracker to struct inet6_dev

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/if_inet6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 653e7d0f65cb..f026cf08a8e8 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -160,6 +160,7 @@ struct ipv6_devstat {
 
 struct inet6_dev {
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
 
 	struct list_head	addr_list;
 
-- 
cgit v1.2.3


From c04438f58d140723e58050fcb9d33d84cb39e9e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:12 -0800
Subject: ipv4: add net device refcount tracker to struct in_device

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/inetdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 518b484a7f07..674aeead6260 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -24,6 +24,8 @@ struct ipv4_devconf {
 
 struct in_device {
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
+
 	refcount_t		refcnt;
 	int			dead;
 	struct in_ifaddr	__rcu *ifa_list;/* IP ifaddr chain		*/
-- 
cgit v1.2.3


From 606509f27f67748b92f783a75a6e39cfaa2fe92d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:13 -0800
Subject: net/sched: add net device refcount tracker to struct Qdisc

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22179b2fda72..dbf202bb1a0e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -125,7 +125,7 @@ struct Qdisc {
 	spinlock_t		seqlock;
 
 	struct rcu_head		rcu;
-
+	netdevice_tracker	dev_tracker;
 	/* private data */
 	long privdata[] ____cacheline_aligned;
 };
-- 
cgit v1.2.3


From 63f13937cbe9b00982dfc8e578b1aec8e5037333 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:14 -0800
Subject: net: linkwatch: add net device refcount tracker

Add a netdevice_tracker inside struct net_device, to track
the self reference when a device is in lweventlist.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index afed3b10491b..69dca1edd5a6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1950,6 +1950,7 @@ enum netdev_ml_priv_type {
  *			keep a list of interfaces to be deleted.
  *
  *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
+ *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
@@ -2280,6 +2281,7 @@ struct net_device {
 	struct bpf_xdp_entity	xdp_state[__MAX_XDP_MODE];
 
 	u8 dev_addr_shadow[MAX_ADDR_LEN];
+	netdevice_tracker	linkwatch_dev_tracker;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
-- 
cgit v1.2.3


From 095e200f175f9843642343a4a48087fcfa4d3751 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:15 -0800
Subject: net: failover: add net device refcount tracker

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/failover.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/failover.h b/include/net/failover.h
index bb15438f39c7..f2b42b4b9cd6 100644
--- a/include/net/failover.h
+++ b/include/net/failover.h
@@ -25,6 +25,7 @@ struct failover_ops {
 struct failover {
 	struct list_head list;
 	struct net_device __rcu *failover_dev;
+	netdevice_tracker	dev_tracker;
 	struct failover_ops __rcu *ops;
 };
 
-- 
cgit v1.2.3


From 42120a86438379eb77424831ae3d696c2d5cb622 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:16 -0800
Subject: ipmr, ip6mr: add net device refcount tracker to struct vif_device

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mroute_base.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 8071148f29a6..e05ee9f001ff 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -12,6 +12,7 @@
 /**
  * struct vif_device - interface representor for multicast routing
  * @dev: network device being used
+ * @dev_tracker: refcount tracker for @dev reference
  * @bytes_in: statistic; bytes ingressing
  * @bytes_out: statistic; bytes egresing
  * @pkt_in: statistic; packets ingressing
@@ -26,6 +27,7 @@
  */
 struct vif_device {
 	struct net_device *dev;
+	netdevice_tracker dev_tracker;
 	unsigned long bytes_in, bytes_out;
 	unsigned long pkt_in, pkt_out;
 	unsigned long rate_limit;
-- 
cgit v1.2.3


From 5fa5ae605821e0e10ee489d9a6e331fd287ccc57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Dec 2021 20:22:17 -0800
Subject: netpoll: add net device refcount tracker to struct netpoll

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netpoll.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e6a2d72e0dc7..bd19c4b91e31 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -24,6 +24,7 @@ union inet_addr {
 
 struct netpoll {
 	struct net_device *dev;
+	netdevice_tracker dev_tracker;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 
-- 
cgit v1.2.3


From 45cac6754529ae17345d8f5b632d9e602a091a20 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Dec 2021 20:53:56 -0800
Subject: net: fix recent csum changes

Vladimir reported csum issues after my recent change in skb_postpull_rcsum()

Issue here is the following:

initial skb->csum is the csum of

[part to be pulled][rest of packet]

Old code:
 skb->csum = csum_sub(skb->csum, csum_partial(pull, pull_length, 0));

New code:
 skb->csum = ~csum_partial(pull, pull_length, ~skb->csum);

This is broken if the csum of [pulled part]
happens to be equal to skb->csum, because end
result of skb->csum is 0 in new code, instead
of being 0xffffffff

David Laight suggested to use

skb->csum = -csum_partial(pull, pull_length, -skb->csum);

I based my patches on existing code present in include/net/seg6.h,
update_csum_diff4() and update_csum_diff16() which might need
a similar fix.

I guess that my tests, mostly pulling 40 bytes of IPv6 header
were not providing enough entropy to hit this bug.

v2: added wsum_negate() to make sparse happy.

Fixes: 29c3002644bd ("net: optimize skb_postpull_rcsum()")
Fixes: 0bd28476f636 ("gro: optimize skb_gro_postpull_rcsum()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: David Laight <David.Laight@ACULAB.COM>
Cc: David Lebrun <dlebrun@google.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20211204045356.3659278-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 3 ++-
 include/net/checksum.h | 4 ++++
 include/net/gro.h      | 4 ++--
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index eae4bd3237a4..dd262bd8ddbe 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3486,7 +3486,8 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = ~csum_partial(start, len, ~skb->csum);
+		skb->csum = wsum_negate(csum_partial(start, len,
+						     wsum_negate(skb->csum)));
 	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
 		 skb_checksum_start_offset(skb) < 0)
 		skb->ip_summed = CHECKSUM_NONE;
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 5b96d5bd6e54..5218041e5c8f 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -180,4 +180,8 @@ static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
 	*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
 }
 
+static inline __wsum wsum_negate(__wsum val)
+{
+	return (__force __wsum)-((__force u32)val);
+}
 #endif
diff --git a/include/net/gro.h b/include/net/gro.h
index b1139fca7c43..8f75802d50fd 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -173,8 +173,8 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
 					const void *start, unsigned int len)
 {
 	if (NAPI_GRO_CB(skb)->csum_valid)
-		NAPI_GRO_CB(skb)->csum = ~csum_partial(start, len,
-						       ~NAPI_GRO_CB(skb)->csum);
+		NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len,
+						wsum_negate(NAPI_GRO_CB(skb)->csum)));
 }
 
 /* GRO checksum functions. These are logical equivalents of the normal
-- 
cgit v1.2.3


From f0e6e6fa41b3d2aa1dcb61dd4ed6d7be004bb5a8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 18 Oct 2021 17:14:07 +0200
Subject: KVM: Drop stale kvm_is_transparent_hugepage() declaration

kvm_is_transparent_hugepage() was removed in commit 205d76ff0684 ("KVM:
Remove kvm_is_transparent_hugepage() and PageTransCompoundMap()") but its
declaration in include/linux/kvm_host.h persisted. Drop it.

Fixes: 205d76ff0684 (""KVM: Remove kvm_is_transparent_hugepage() and PageTransCompoundMap()")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211018151407.2107363-1-vkuznets@redhat.com
---
 include/linux/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c310648cc8f1..6d138adc78af 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1174,7 +1174,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
 bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
-bool kvm_is_transparent_hugepage(kvm_pfn_t pfn);
 
 struct kvm_irq_ack_notifier {
 	struct hlist_node link;
-- 
cgit v1.2.3


From a9e6107616bb8108aa4fc22584a05e69761a91f7 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 1 Dec 2021 13:41:26 +0100
Subject: media: cec: fix a deadlock situation

The cec_devnode struct has a lock meant to serialize access
to the fields of this struct. This lock is taken during
device node (un)registration and when opening or releasing a
filehandle to the device node. When the last open filehandle
is closed the cec adapter might be disabled by calling the
adap_enable driver callback with the devnode.lock held.

However, if during that callback a message or event arrives
then the driver will call one of the cec_queue_event()
variants in cec-adap.c, and those will take the same devnode.lock
to walk the open filehandle list.

This obviously causes a deadlock.

This is quite easy to reproduce with the cec-gpio driver since that
uses the cec-pin framework which generated lots of events and uses
a kernel thread for the processing, so when adap_enable is called
the thread is still running and can generate events.

But I suspect that it might also happen with other drivers if an
interrupt arrives signaling e.g. a received message before adap_enable
had a chance to disable the interrupts.

This patch adds a new mutex to serialize access to the fhs list.
When adap_enable() is called the devnode.lock mutex is held, but
not devnode.lock_fhs. The event functions in cec-adap.c will now
use devnode.lock_fhs instead of devnode.lock, ensuring that it is
safe to call those functions from the adap_enable callback.

This specific issue only happens if the last open filehandle is closed
and the physical address is invalid. This is not something that
happens during normal operation, but it does happen when monitoring
CEC traffic (e.g. cec-ctl --monitor) with an unconfigured CEC adapter.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: <stable@vger.kernel.org>  # for v5.13 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/cec.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/media/cec.h b/include/media/cec.h
index 208c9613c07e..77346f757036 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -26,13 +26,17 @@
  * @dev:	cec device
  * @cdev:	cec character device
  * @minor:	device node minor number
+ * @lock:	lock to serialize open/release and registration
  * @registered:	the device was correctly registered
  * @unregistered: the device was unregistered
+ * @lock_fhs:	lock to control access to @fhs
  * @fhs:	the list of open filehandles (cec_fh)
- * @lock:	lock to control access to this structure
  *
  * This structure represents a cec-related device node.
  *
+ * To add or remove filehandles from @fhs the @lock must be taken first,
+ * followed by @lock_fhs. It is safe to access @fhs if either lock is held.
+ *
  * The @parent is a physical device. It must be set by core or device drivers
  * before registering the node.
  */
@@ -43,10 +47,13 @@ struct cec_devnode {
 
 	/* device info */
 	int minor;
+	/* serialize open/release and registration */
+	struct mutex lock;
 	bool registered;
 	bool unregistered;
+	/* protect access to fhs */
+	struct mutex lock_fhs;
 	struct list_head fhs;
-	struct mutex lock;
 };
 
 struct cec_adapter;
-- 
cgit v1.2.3


From ee1806beff85d4f1a3d7f22aa1bcdc8ffb59b36c Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Wed, 1 Dec 2021 23:56:51 +0100
Subject: media: videobuf2: add WARN_ON_ONCE if bytesused is bigger than buffer
 length

In function vb2_set_plane_payload, report if the given bytesused is
bigger than the buffer size, and clamp it to the buffer size.

Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/videobuf2-core.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 2467284e5f26..5468b633b9d2 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -1155,8 +1155,15 @@ static inline void *vb2_get_drv_priv(struct vb2_queue *q)
 static inline void vb2_set_plane_payload(struct vb2_buffer *vb,
 				 unsigned int plane_no, unsigned long size)
 {
-	if (plane_no < vb->num_planes)
+	/*
+	 * size must never be larger than the buffer length, so
+	 * warn and clamp to the buffer length if that's the case.
+	 */
+	if (plane_no < vb->num_planes) {
+		if (WARN_ON_ONCE(size > vb->planes[plane_no].length))
+			size = vb->planes[plane_no].length;
 		vb->planes[plane_no].bytesused = size;
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 77993b595ada5731e513eb06a0f4bf4b9f1e9532 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 29 Nov 2021 18:46:54 +0100
Subject: locking: Allow to include asm/spinlock_types.h from
 linux/spinlock_types_raw.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The printk header file includes ratelimit_types.h for its __ratelimit()
based usage. It is required for the static initializer used in
printk_ratelimited(). It uses a raw_spinlock_t and includes the
spinlock_types.h.

PREEMPT_RT substitutes spinlock_t with a rtmutex based implementation and so
its spinlock_t implmentation (provided by spinlock_rt.h) includes rtmutex.h and
atomic.h which leads to recursive includes where defines are missing.

By including only the raw_spinlock_t defines it avoids the atomic.h
related includes at this stage.

An example on powerpc:

|  CALL    scripts/atomic/check-atomics.sh
|In file included from include/linux/bug.h:5,
|                 from include/linux/page-flags.h:10,
|                 from kernel/bounds.c:10:
|arch/powerpc/include/asm/page_32.h: In function âclear_pageâ:
|arch/powerpc/include/asm/bug.h:87:4: error: implicit declaration of function â=80=98__WARNâ=80=99 [-Werror=3Dimplicit-function-declaration]
|   87 |    __WARN();    \
|      |    ^~~~~~
|arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro âWARN_ONâ=99
|   48 |  WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
|      |  ^~~~~~~
|arch/powerpc/include/asm/bug.h:58:17: error: invalid application of âsizeofâ=99 to incomplete type âstruct bug_entryâ=99
|   58 |     "i" (sizeof(struct bug_entry)), \
|      |                 ^~~~~~
|arch/powerpc/include/asm/bug.h:89:3: note: in expansion of macro âBUG_ENTRYâ=99
|   89 |   BUG_ENTRY(PPC_TLNEI " %4, 0",   \
|      |   ^~~~~~~~~
|arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro âWARN_ONâ=99
|   48 |  WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
|      |  ^~~~~~~
|In file included from arch/powerpc/include/asm/ptrace.h:298,
|                 from arch/powerpc/include/asm/hw_irq.h:12,
|                 from arch/powerpc/include/asm/irqflags.h:12,
|                 from include/linux/irqflags.h:16,
|                 from include/asm-generic/cmpxchg-local.h:6,
|                 from arch/powerpc/include/asm/cmpxchg.h:526,
|                 from arch/powerpc/include/asm/atomic.h:11,
|                 from include/linux/atomic.h:7,
|                 from include/linux/rwbase_rt.h:6,
|                 from include/linux/rwlock_types.h:55,
|                 from include/linux/spinlock_types.h:74,
|                 from include/linux/ratelimit_types.h:7,
|                 from include/linux/printk.h:10,
|                 from include/asm-generic/bug.h:22,
|                 from arch/powerpc/include/asm/bug.h:109,
|                 from include/linux/bug.h:5,
|                 from include/linux/page-flags.h:10,
|                 from kernel/bounds.c:10:
|include/linux/thread_info.h: In function â=80=98copy_overflowâ=80=99:
|include/linux/thread_info.h:210:2: error: implicit declaration of function â=80=98WARNâ=80=99 [-Werror=3Dimplicit-function-declaration]
|  210 |  WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
|      |  ^~~~

The WARN / BUG include pulls in printk.h and then ptrace.h expects WARN
(from bug.h) which is not yet complete. Even hw_irq.h has WARN_ON()
statements.

On POWERPC64 there are missing atomic64 defines while building 32bit
VDSO:
|  VDSO32C arch/powerpc/kernel/vdso32/vgettimeofday.o
|In file included from include/linux/atomic.h:80,
|                 from include/linux/rwbase_rt.h:6,
|                 from include/linux/rwlock_types.h:55,
|                 from include/linux/spinlock_types.h:74,
|                 from include/linux/ratelimit_types.h:7,
|                 from include/linux/printk.h:10,
|                 from include/linux/kernel.h:19,
|                 from arch/powerpc/include/asm/page.h:11,
|                 from arch/powerpc/include/asm/vdso/gettimeofday.h:5,
|                 from include/vdso/datapage.h:137,
|                 from lib/vdso/gettimeofday.c:5,
|                 from <command-line>:
|include/linux/atomic-arch-fallback.h: In function âarch_atomic64_incâ=99:
|include/linux/atomic-arch-fallback.h:1447:2: error: implicit declaration of function âarch_atomic64_addâ; did you mean âarch_atomic_addâ? [-Werror=3Dimpl
|icit-function-declaration]
| 1447 |  arch_atomic64_add(1, v);
|      |  ^~~~~~~~~~~~~~~~~
|      |  arch_atomic_add

The generic fallback is not included, atomics itself are not used. If
kernel.h does not include printk.h then it comes later from the bug.h
include.

Allow asm/spinlock_types.h to be included from
linux/spinlock_types_raw.h.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20211129174654.668506-12-bigeasy@linutronix.de
---
 include/linux/ratelimit_types.h   | 2 +-
 include/linux/spinlock_types_up.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index b676aa419eef..c21c7f8103e2 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -4,7 +4,7 @@
 
 #include <linux/bits.h>
 #include <linux/param.h>
-#include <linux/spinlock_types.h>
+#include <linux/spinlock_types_raw.h>
 
 #define DEFAULT_RATELIMIT_INTERVAL	(5 * HZ)
 #define DEFAULT_RATELIMIT_BURST		10
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index c09b6407ae1b..7f86a2016ac5 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -1,7 +1,7 @@
 #ifndef __LINUX_SPINLOCK_TYPES_UP_H
 #define __LINUX_SPINLOCK_TYPES_UP_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
 # error "please don't include this file directly"
 #endif
 
-- 
cgit v1.2.3


From 8d9f738f16a3ee9f2341578873c542ddd9802fe4 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 7 Dec 2021 20:32:30 +0800
Subject: regulator: fix bullet lists of regulator_ops comment

Since 89a6a5e56c82("regulator: add property parsing and callbacks to set protection limits")
which introduced a warning:

Documentation/driver-api/regulator:166: ./include/linux/regulator/driver.h:96: WARNING: Unexpected indentation.
Documentation/driver-api/regulator:166: ./include/linux/regulator/driver.h:98: WARNING: Block quote ends without a blank line; unexpected unindent.

Let's fix them.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20211207123230.2262047-1-siyanteng@loongson.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 4078c7776453..720684995a77 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -90,15 +90,19 @@ enum regulator_detection_severity {
  * @set_over_current_protection: Support enabling of and setting limits for over
  *	current situation detection. Detection can be configured for three
  *	levels of severity.
- *	REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s).
- *	REGULATOR_SEVERITY_ERR should indicate that over-current situation is
- *		caused by an unrecoverable error but HW does not perform
- *		automatic shut down.
- *	REGULATOR_SEVERITY_WARN should indicate situation where hardware is
- *		still believed to not be damaged but that a board sepcific
- *		recovery action is needed. If lim_uA is 0 the limit should not
- *		be changed but the detection should just be enabled/disabled as
- *		is requested.
+ *
+ *	- REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s).
+ *
+ *	- REGULATOR_SEVERITY_ERR should indicate that over-current situation is
+ *		  caused by an unrecoverable error but HW does not perform
+ *		  automatic shut down.
+ *
+ *	- REGULATOR_SEVERITY_WARN should indicate situation where hardware is
+ *		  still believed to not be damaged but that a board sepcific
+ *		  recovery action is needed. If lim_uA is 0 the limit should not
+ *		  be changed but the detection should just be enabled/disabled as
+ *		  is requested.
+ *
  * @set_over_voltage_protection: Support enabling of and setting limits for over
  *	voltage situation detection. Detection can be configured for same
  *	severities as over current protection. Units of uV.
-- 
cgit v1.2.3


From 500daa0e6be292edcf635ba6b090b89da80e90a8 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 1 Dec 2021 16:32:56 +0900
Subject: dt-bindings: power: Add r8a779f0 SYSC power domain definitions

Add power domain indices for R-Car S4-8 (r8a779f0).

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Link: https://lore.kernel.org/r/20211201073308.1003945-3-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/power/r8a779f0-sysc.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 include/dt-bindings/power/r8a779f0-sysc.h

(limited to 'include')

diff --git a/include/dt-bindings/power/r8a779f0-sysc.h b/include/dt-bindings/power/r8a779f0-sysc.h
new file mode 100644
index 000000000000..0ec8ad727ed9
--- /dev/null
+++ b/include/dt-bindings/power/r8a779f0-sysc.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: (GPL-2.0 or MIT) */
+/*
+ * Copyright (C) 2021 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_POWER_R8A779F0_SYSC_H__
+#define __DT_BINDINGS_POWER_R8A779F0_SYSC_H__
+
+/*
+ * These power domain indices match the Power Domain Register Numbers (PDR)
+ */
+
+#define R8A779F0_PD_A1E0D0C0		0
+#define R8A779F0_PD_A1E0D0C1		1
+#define R8A779F0_PD_A1E0D1C0		2
+#define R8A779F0_PD_A1E0D1C1		3
+#define R8A779F0_PD_A1E1D0C0		4
+#define R8A779F0_PD_A1E1D0C1		5
+#define R8A779F0_PD_A1E1D1C0		6
+#define R8A779F0_PD_A1E1D1C1		7
+#define R8A779F0_PD_A2E0D0		16
+#define R8A779F0_PD_A2E0D1		17
+#define R8A779F0_PD_A2E1D0		18
+#define R8A779F0_PD_A2E1D1		19
+#define R8A779F0_PD_A3E0		20
+#define R8A779F0_PD_A3E1		21
+
+/* Always-on power area */
+#define R8A779F0_PD_ALWAYS_ON		64
+
+#endif /* __DT_BINDINGS_POWER_R8A779A0_SYSC_H__*/
-- 
cgit v1.2.3


From 81c1655823237eaec74b2c175ae8e13adfccdaf7 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 1 Dec 2021 16:32:57 +0900
Subject: dt-bindings: clock: Add r8a779f0 CPG Core Clock Definitions

Add all Clock Pulse Generator Core Clock Outputs for the Renesas
R-Car S4-8 (R8A779F0) SoC.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Link: https://lore.kernel.org/r/20211201073308.1003945-4-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/r8a779f0-cpg-mssr.h | 64 +++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 include/dt-bindings/clock/r8a779f0-cpg-mssr.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/r8a779f0-cpg-mssr.h b/include/dt-bindings/clock/r8a779f0-cpg-mssr.h
new file mode 100644
index 000000000000..f2ae1c6a82dd
--- /dev/null
+++ b/include/dt-bindings/clock/r8a779f0-cpg-mssr.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: (GPL-2.0 or MIT) */
+/*
+ * Copyright (C) 2021 Renesas Electronics Corp.
+ */
+#ifndef __DT_BINDINGS_CLOCK_R8A779F0_CPG_MSSR_H__
+#define __DT_BINDINGS_CLOCK_R8A779F0_CPG_MSSR_H__
+
+#include <dt-bindings/clock/renesas-cpg-mssr.h>
+
+/* r8a779f0 CPG Core Clocks */
+
+#define R8A779F0_CLK_ZX			0
+#define R8A779F0_CLK_ZS			1
+#define R8A779F0_CLK_ZT			2
+#define R8A779F0_CLK_ZTR		3
+#define R8A779F0_CLK_S0D2		4
+#define R8A779F0_CLK_S0D3		5
+#define R8A779F0_CLK_S0D4		6
+#define R8A779F0_CLK_S0D2_MM		7
+#define R8A779F0_CLK_S0D3_MM		8
+#define R8A779F0_CLK_S0D4_MM		9
+#define R8A779F0_CLK_S0D2_RT		10
+#define R8A779F0_CLK_S0D3_RT		11
+#define R8A779F0_CLK_S0D4_RT		12
+#define R8A779F0_CLK_S0D6_RT		13
+#define R8A779F0_CLK_S0D3_PER		14
+#define R8A779F0_CLK_S0D6_PER		15
+#define R8A779F0_CLK_S0D12_PER		16
+#define R8A779F0_CLK_S0D24_PER		17
+#define R8A779F0_CLK_S0D2_HSC		18
+#define R8A779F0_CLK_S0D3_HSC		19
+#define R8A779F0_CLK_S0D4_HSC		20
+#define R8A779F0_CLK_S0D6_HSC		21
+#define R8A779F0_CLK_S0D12_HSC		22
+#define R8A779F0_CLK_S0D2_CC		23
+#define R8A779F0_CLK_CL			24
+#define R8A779F0_CLK_CL16M		25
+#define R8A779F0_CLK_CL16M_MM		26
+#define R8A779F0_CLK_CL16M_RT		27
+#define R8A779F0_CLK_CL16M_PER		28
+#define R8A779F0_CLK_CL16M_HSC		29
+#define R8A779F0_CLK_Z0			30
+#define R8A779F0_CLK_Z1			31
+#define R8A779F0_CLK_ZB3		32
+#define R8A779F0_CLK_ZB3D2		33
+#define R8A779F0_CLK_ZB3D4		34
+#define R8A779F0_CLK_SD0H		35
+#define R8A779F0_CLK_SD0		36
+#define R8A779F0_CLK_RPC		37
+#define R8A779F0_CLK_RPCD2		38
+#define R8A779F0_CLK_MSO		39
+#define R8A779F0_CLK_SASYNCRT		40
+#define R8A779F0_CLK_SASYNCPERD1	41
+#define R8A779F0_CLK_SASYNCPERD2	42
+#define R8A779F0_CLK_SASYNCPERD4	43
+#define R8A779F0_CLK_DBGSOC_HSC		44
+#define R8A779F0_CLK_RSW2		45
+#define R8A779F0_CLK_OSC		46
+#define R8A779F0_CLK_ZR			47
+#define R8A779F0_CLK_CPEX		48
+#define R8A779F0_CLK_CBFUSA		49
+#define R8A779F0_CLK_R			50
+
+#endif /* __DT_BINDINGS_CLOCK_R8A779F0_CPG_MSSR_H__ */
-- 
cgit v1.2.3


From 13244cccc2b61ec715f0ac583d3037497004d4a5 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:54:52 -0800
Subject: skbuff: introduce skb_pull_data

Like skb_pull but returns the original data pointer before pulling the
data after performing a check against sbk->len.

This allows to change code that does "struct foo *p = (void *)skb->data;"
which is hard to audit and error prone, to:

        p = skb_pull_data(skb, sizeof(*p));
        if (!p)
                return;

Which is both safer and cleaner.

Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/skbuff.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index eba256af64a5..877dda38684a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2373,6 +2373,8 @@ static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len)
 	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
 }
 
+void *skb_pull_data(struct sk_buff *skb, size_t len);
+
 void *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
 static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len)
-- 
cgit v1.2.3


From ae61a10d9d46c4a0844c46d0863bf991c4dda66c Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:54:53 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse BR/EDR events

This uses skb_pull_data to check the BR/EDR events received have the
minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0d2a9216869b..ba89b078ceb5 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2012,6 +2012,10 @@ struct hci_cp_le_reject_cis {
 } __packed;
 
 /* ---- HCI Events ---- */
+struct hci_ev_status {
+	__u8    status;
+} __packed;
+
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
 #define HCI_EV_INQUIRY_RESULT		0x02
-- 
cgit v1.2.3


From aadc3d2f42a5bfcec597bfd0d997e3982f740846 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:54:55 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse Number of Complete Packets
 event

This uses skb_pull_data to check the Number of Complete Packets events
received have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ba89b078ceb5..3f57fd677b67 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2139,7 +2139,7 @@ struct hci_comp_pkts_info {
 } __packed;
 
 struct hci_ev_num_comp_pkts {
-	__u8     num_hndl;
+	__u8     num;
 	struct hci_comp_pkts_info handles[];
 } __packed;
 
-- 
cgit v1.2.3


From 27d9eb4bcac16446ddbbea8860c11720b7afa891 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:54:56 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse Inquiry Result event

This uses skb_pull_data to check the Inquiry Result events received
have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3f57fd677b67..55466bfb972a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2028,6 +2028,11 @@ struct inquiry_info {
 	__le16   clock_offset;
 } __packed;
 
+struct hci_ev_inquiry_result {
+	__u8    num;
+	struct inquiry_info info[];
+};
+
 #define HCI_EV_CONN_COMPLETE		0x03
 struct hci_ev_conn_complete {
 	__u8     status;
-- 
cgit v1.2.3


From 8d08d324fdcb7729f22b236a3e20fa37a8a29e43 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:54:57 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse Inquiry Result with RSSI
 event

This uses skb_pull_data to check the Inquiry Result with RSSI events
received have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 55466bfb972a..d25afd92a46e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2194,7 +2194,7 @@ struct hci_ev_pscan_rep_mode {
 } __packed;
 
 #define HCI_EV_INQUIRY_RESULT_WITH_RSSI	0x22
-struct inquiry_info_with_rssi {
+struct inquiry_info_rssi {
 	bdaddr_t bdaddr;
 	__u8     pscan_rep_mode;
 	__u8     pscan_period_mode;
@@ -2202,7 +2202,7 @@ struct inquiry_info_with_rssi {
 	__le16   clock_offset;
 	__s8     rssi;
 } __packed;
-struct inquiry_info_with_rssi_and_pscan_mode {
+struct inquiry_info_rssi_pscan {
 	bdaddr_t bdaddr;
 	__u8     pscan_rep_mode;
 	__u8     pscan_period_mode;
@@ -2211,6 +2211,14 @@ struct inquiry_info_with_rssi_and_pscan_mode {
 	__le16   clock_offset;
 	__s8     rssi;
 } __packed;
+struct hci_ev_inquiry_result_rssi {
+	__u8     num;
+	struct inquiry_info_rssi info[];
+} __packed;
+struct hci_ev_inquiry_result_rssi_pscan {
+	__u8     num;
+	struct inquiry_info_rssi_pscan info[];
+} __packed;
 
 #define HCI_EV_REMOTE_EXT_FEATURES	0x23
 struct hci_ev_remote_ext_features {
-- 
cgit v1.2.3


From 70a6b8de6af5c3e517bfd2ce8dccbd65c3f5bb4f Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:54:58 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse Extended Inquiry Result
 event

This uses skb_pull_data to check the Extended Inquiry Result events
received have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index d25afd92a46e..9e721e6efef3 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2273,6 +2273,11 @@ struct extended_inquiry_info {
 	__u8     data[240];
 } __packed;
 
+struct hci_ev_ext_inquiry_result {
+	__u8     num;
+	struct extended_inquiry_info info[];
+} __packed;
+
 #define HCI_EV_KEY_REFRESH_COMPLETE	0x30
 struct hci_ev_key_refresh_complete {
 	__u8	status;
-- 
cgit v1.2.3


From 47afe93c913a4cd0143667b59ba622086a2acfce Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:55:00 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse LE Advertising Report
 event

This uses skb_pull_data to check the LE Advertising Report events
received have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 9e721e6efef3..c005b1ccdbc5 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2445,13 +2445,18 @@ struct hci_ev_le_conn_complete {
 
 #define HCI_EV_LE_ADVERTISING_REPORT	0x02
 struct hci_ev_le_advertising_info {
-	__u8	 evt_type;
+	__u8	 type;
 	__u8	 bdaddr_type;
 	bdaddr_t bdaddr;
 	__u8	 length;
 	__u8	 data[];
 } __packed;
 
+struct hci_ev_le_advertising_report {
+	__u8    num;
+	struct hci_ev_le_advertising_info info[];
+} __packed;
+
 #define HCI_EV_LE_CONN_UPDATE_COMPLETE	0x03
 struct hci_ev_le_conn_update_complete {
 	__u8     status;
-- 
cgit v1.2.3


From b48b833f9e8aa0675820a3b5a0f30d7319590ea8 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:55:01 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse LE Ext Advertising Report
 event

This uses skb_pull_data to check the LE Extended Advertising Report
events received have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index c005b1ccdbc5..d3f2da9b2ac2 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2517,8 +2517,8 @@ struct hci_ev_le_phy_update_complete {
 } __packed;
 
 #define HCI_EV_LE_EXT_ADV_REPORT    0x0d
-struct hci_ev_le_ext_adv_report {
-	__le16 	 evt_type;
+struct hci_ev_le_ext_adv_info {
+	__le16   type;
 	__u8	 bdaddr_type;
 	bdaddr_t bdaddr;
 	__u8	 primary_phy;
@@ -2526,11 +2526,16 @@ struct hci_ev_le_ext_adv_report {
 	__u8	 sid;
 	__u8	 tx_power;
 	__s8	 rssi;
-	__le16 	 interval;
-	__u8  	 direct_addr_type;
+	__le16   interval;
+	__u8     direct_addr_type;
 	bdaddr_t direct_addr;
-	__u8  	 length;
-	__u8	 data[];
+	__u8     length;
+	__u8     data[];
+} __packed;
+
+struct hci_ev_le_ext_adv_report {
+	__u8     num;
+	struct hci_ev_le_ext_adv_info info[];
 } __packed;
 
 #define HCI_EV_LE_ENHANCED_CONN_COMPLETE    0x0a
-- 
cgit v1.2.3


From a3679649a19179813c3853b8bb397a801d9dd253 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 10:55:02 -0800
Subject: Bluetooth: HCI: Use skb_pull_data to parse LE Direct Advertising
 Report event

This uses skb_pull_data to check the LE Direct Advertising Report
events received have the minimum required length.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index d3f2da9b2ac2..4343f79bd02c 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2500,7 +2500,7 @@ struct hci_ev_le_data_len_change {
 
 #define HCI_EV_LE_DIRECT_ADV_REPORT	0x0B
 struct hci_ev_le_direct_adv_info {
-	__u8	 evt_type;
+	__u8	 type;
 	__u8	 bdaddr_type;
 	bdaddr_t bdaddr;
 	__u8	 direct_addr_type;
@@ -2508,6 +2508,11 @@ struct hci_ev_le_direct_adv_info {
 	__s8	 rssi;
 } __packed;
 
+struct hci_ev_le_direct_adv_report {
+	__u8	 num;
+	struct hci_ev_le_direct_adv_info info[];
+} __packed;
+
 #define HCI_EV_LE_PHY_UPDATE_COMPLETE	0x0c
 struct hci_ev_le_phy_update_complete {
 	__u8  status;
-- 
cgit v1.2.3


From d2f8114f9574509580a8506d2ef72e7e43d1a5bd Mon Sep 17 00:00:00 2001
From: Aditya Garg <gargaditya08@live.com>
Date: Thu, 2 Dec 2021 12:41:59 +0000
Subject: Bluetooth: add quirk disabling LE Read Transmit Power

Some devices have a bug causing them to not work if they query
LE tx power on startup. Thus we add a quirk in order to not query it
and default min/max tx power values to HCI_TX_POWER_INVALID.

Signed-off-by: Aditya Garg <gargaditya08@live.com>
Reported-by: Orlando Chamberlain <redecorating@protonmail.com>
Tested-by: Orlando Chamberlain <redecorating@protonmail.com>
Link:
https://lore.kernel.org/r/4970a940-211b-25d6-edab-21a815313954@protonmail.com
Fixes: 7c395ea521e6 ("Bluetooth: Query LE tx power on startup")
Cc: stable@vger.kernel.org
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4343f79bd02c..c5f6b82b9d11 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -246,6 +246,15 @@ enum {
 	 * HCI after resume.
 	 */
 	HCI_QUIRK_NO_SUSPEND_NOTIFIER,
+
+	/*
+	 * When this quirk is set, LE tx power is not queried on startup
+	 * and the min/max tx power values default to HCI_TX_POWER_INVALID.
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 */
+	HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER,
 };
 
 /* HCI device flags */
-- 
cgit v1.2.3


From fe92ee6425a2c79ee84f0b9b8a14117200d15f7d Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 1 Dec 2021 11:49:50 -0800
Subject: Bluetooth: hci_core: Rework hci_conn_params flags

This reworks hci_conn_params flags to use bitmap_* helpers and add
support for setting the supported flags in hdev->conn_flags so it can
easily be accessed.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 7bae8376fd6f..d1b67755a373 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -152,22 +152,21 @@ struct bdaddr_list_with_irk {
 	u8 local_irk[16];
 };
 
-struct bdaddr_list_with_flags {
-	struct list_head list;
-	bdaddr_t bdaddr;
-	u8 bdaddr_type;
-	u32 current_flags;
-};
-
 enum hci_conn_flags {
 	HCI_CONN_FLAG_REMOTE_WAKEUP,
-	HCI_CONN_FLAG_MAX
-};
 
-#define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr))
+	__HCI_CONN_NUM_FLAGS,
+};
 
 /* Make sure number of flags doesn't exceed sizeof(current_flags) */
-static_assert(HCI_CONN_FLAG_MAX < 32);
+static_assert(__HCI_CONN_NUM_FLAGS < 32);
+
+struct bdaddr_list_with_flags {
+	struct list_head list;
+	bdaddr_t bdaddr;
+	u8 bdaddr_type;
+	DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS);
+};
 
 struct bt_uuid {
 	struct list_head list;
@@ -560,6 +559,7 @@ struct hci_dev {
 	struct rfkill		*rfkill;
 
 	DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
+	DECLARE_BITMAP(conn_flags, __HCI_CONN_NUM_FLAGS);
 
 	__s8			adv_tx_power;
 	__u8			adv_data[HCI_MAX_EXT_AD_LENGTH];
@@ -755,7 +755,7 @@ struct hci_conn_params {
 
 	struct hci_conn *conn;
 	bool explicit_connect;
-	u32 current_flags;
+	DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS);
 };
 
 extern struct list_head hci_dev_list;
-- 
cgit v1.2.3


From 6126ffabba6b415bd6bf3e1b091ef074e86765c6 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 3 Dec 2021 14:07:22 -0800
Subject: Bluetooth: Introduce HCI_CONN_FLAG_DEVICE_PRIVACY device flag

This introduces HCI_CONN_FLAG_DEVICE_PRIVACY which can be used by
userspace to indicate to the controller to use Device Privacy Mode to a
specific device.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d1b67755a373..cf24af649c7f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -154,6 +154,7 @@ struct bdaddr_list_with_irk {
 
 enum hci_conn_flags {
 	HCI_CONN_FLAG_REMOTE_WAKEUP,
+	HCI_CONN_FLAG_DEVICE_PRIVACY,
 
 	__HCI_CONN_NUM_FLAGS,
 };
@@ -1466,6 +1467,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define use_ll_privacy(dev) (ll_privacy_capable(dev) && \
 			     hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY))
 
+#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
+				   (hdev->commands[39] & 0x04))
+
 /* Use enhanced synchronous connection if command is supported */
 #define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08)
 
-- 
cgit v1.2.3


From 853b70b506a20cddf96419e8ac198df92e51bc4c Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 3 Dec 2021 14:07:23 -0800
Subject: Bluetooth: hci_sync: Set Privacy Mode when updating the resolving
 list

This adds support for Set Privacy Mode when updating the resolving list
when HCI_CONN_FLAG_DEVICE_PRIVACY so the controller shall use Device
Mode for devices programmed in the resolving list, Device Mode is
actually required when the remote device are not able to use RPA as
otherwise the default mode is Network Privacy Mode in which only
allows RPAs thus the controller would filter out advertisement using
identity addresses for which there is an IRK.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 10 ++++++++++
 include/net/bluetooth/hci_core.h |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index c5f6b82b9d11..7f5f00ff53da 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1940,6 +1940,16 @@ struct hci_rp_le_read_transmit_power {
 	__s8  max_le_tx_power;
 } __packed;
 
+#define HCI_NETWORK_PRIVACY		0x00
+#define HCI_DEVICE_PRIVACY		0x01
+
+#define HCI_OP_LE_SET_PRIVACY_MODE	0x204e
+struct hci_cp_le_set_privacy_mode {
+	__u8  bdaddr_type;
+	bdaddr_t  bdaddr;
+	__u8  mode;
+} __packed;
+
 #define HCI_OP_LE_READ_BUFFER_SIZE_V2	0x2060
 struct hci_rp_le_read_buffer_size_v2 {
 	__u8    status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cf24af649c7f..4d69dcfebd63 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -757,6 +757,7 @@ struct hci_conn_params {
 	struct hci_conn *conn;
 	bool explicit_connect;
 	DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS);
+	u8  privacy_mode;
 };
 
 extern struct list_head hci_dev_list;
-- 
cgit v1.2.3


From 8aca46f91c42020bc58cd56e464a1101e517aa10 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 3 Dec 2021 16:15:40 -0800
Subject: Bluetooth: mgmt: Introduce mgmt_alloc_skb and mgmt_send_event_skb

This introduces mgmt_alloc_skb and mgmt_send_event_skb which are
convenient when building MGMT events that have variable length as the
likes of skb_put_data can be used to insert portion directly on the skb
instead of having to first build an intermediate buffer just to be
copied over the skb.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 2f31e571f34c..77906832353f 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -390,6 +390,11 @@ struct hci_ctrl {
 	};
 };
 
+struct mgmt_ctrl {
+	struct hci_dev *hdev;
+	u16 opcode;
+};
+
 struct bt_skb_cb {
 	u8 pkt_type;
 	u8 force_active;
@@ -399,6 +404,7 @@ struct bt_skb_cb {
 		struct l2cap_ctrl l2cap;
 		struct sco_ctrl sco;
 		struct hci_ctrl hci;
+		struct mgmt_ctrl mgmt;
 	};
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
-- 
cgit v1.2.3


From 6fadaa565882cd7afc501de5921db6f5e45c784b Mon Sep 17 00:00:00 2001
From: Maxim Galaganov <max@internet.ru>
Date: Fri, 3 Dec 2021 14:35:39 -0800
Subject: tcp: expose __tcp_sock_set_cork and __tcp_sock_set_nodelay

Expose __tcp_sock_set_cork() and __tcp_sock_set_nodelay() for use in
MPTCP setsockopt code -- namely for syncing MPTCP socket options with
subflows inside sync_socket_options() while already holding the subflow
socket lock.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Maxim Galaganov <max@internet.ru>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 48d8a363319e..78b91bb92f0d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -512,11 +512,13 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 		  int shiftlen);
 
+void __tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_cork(struct sock *sk, bool on);
 int tcp_sock_set_keepcnt(struct sock *sk, int val);
 int tcp_sock_set_keepidle_locked(struct sock *sk, int val);
 int tcp_sock_set_keepidle(struct sock *sk, int val);
 int tcp_sock_set_keepintvl(struct sock *sk, int val);
+void __tcp_sock_set_nodelay(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk);
 void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
-- 
cgit v1.2.3


From 213d56bf33bdda835bac04046f09256a75c5ca8e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:07 +0200
Subject: rcu/nocb: Prepare state machine for a new step

Currently SEGCBLIST_SOFTIRQ_ONLY is a bit of an exception among the
segcblist flags because it is an exclusive state that doesn't mix up
with the other flags. Remove it in favour of:

_ A flag specifying that rcu_core() needs to perform callbacks execution
  and acceleration

and

_ A flag specifying we want the nocb lock to be held in any needed
  circumstances

This clarifies the code and is more flexible: It allows to have a state
where rcu_core() runs with locking while offloading hasn't started yet.
This is a necessary step to prepare for triggering rcu_core() at the
very beginning of the de-offloading process so that rcu_core() won't
dismiss work while being preempted by the de-offloading process, at
least not without a pending subsequent rcu_core() that will quickly
catch up.

Reviewed-by: Valentin Schneider <Valentin.Schneider@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 3db96c4f45fd..812961b1d064 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -69,7 +69,7 @@ struct rcu_cblist {
  *
  *
  *  ----------------------------------------------------------------------------
- *  |                         SEGCBLIST_SOFTIRQ_ONLY                           |
+ *  |                              SEGCBLIST_RCU_CORE                          |
  *  |                                                                          |
  *  |  Callbacks processed by rcu_core() from softirqs or local                |
  *  |  rcuc kthread, without holding nocb_lock.                                |
@@ -77,7 +77,7 @@ struct rcu_cblist {
  *                                         |
  *                                         v
  *  ----------------------------------------------------------------------------
- *  |                        SEGCBLIST_OFFLOADED                               |
+ *  |       SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED       |
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
  *  | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads,     |
@@ -89,7 +89,9 @@ struct rcu_cblist {
  *                        |                                 |
  *                        v                                 v
  *  ---------------------------------------  ----------------------------------|
- *  |        SEGCBLIST_OFFLOADED |        |  |     SEGCBLIST_OFFLOADED |       |
+ *  |        SEGCBLIST_RCU_CORE   |       |  |     SEGCBLIST_RCU_CORE   |      |
+ *  |        SEGCBLIST_LOCKING    |       |  |     SEGCBLIST_LOCKING    |      |
+ *  |        SEGCBLIST_OFFLOADED  |       |  |     SEGCBLIST_OFFLOADED  |      |
  *  |        SEGCBLIST_KTHREAD_CB         |  |     SEGCBLIST_KTHREAD_GP        |
  *  |                                     |  |                                 |
  *  |                                     |  |                                 |
@@ -104,9 +106,10 @@ struct rcu_cblist {
  *                                         |
  *                                         v
  *  |--------------------------------------------------------------------------|
- *  |                           SEGCBLIST_OFFLOADED |                          |
- *  |                           SEGCBLIST_KTHREAD_CB |                         |
- *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
+ *  |                           SEGCBLIST_KTHREAD_GP |                         |
+ *  |                           SEGCBLIST_KTHREAD_CB                           |
  *  |                                                                          |
  *  |   Kthreads handle callbacks holding nocb_lock, local rcu_core() stops    |
  *  |   handling callbacks. Enable bypass queueing.                            |
@@ -120,7 +123,8 @@ struct rcu_cblist {
  *
  *
  *  |--------------------------------------------------------------------------|
- *  |                           SEGCBLIST_OFFLOADED |                          |
+ *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
@@ -130,6 +134,8 @@ struct rcu_cblist {
  *                                      |
  *                                      v
  *  |--------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE   |                         |
+ *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
@@ -143,7 +149,9 @@ struct rcu_cblist {
  *                     |                                 |
  *                     v                                 v
  *  ---------------------------------------------------------------------------|
- *  |                                                                          |
+ *  |                                     |                                    |
+ *  |        SEGCBLIST_RCU_CORE |         |       SEGCBLIST_RCU_CORE |         |
+ *  |        SEGCBLIST_LOCKING  |         |       SEGCBLIST_LOCKING  |         |
  *  |        SEGCBLIST_KTHREAD_CB         |       SEGCBLIST_KTHREAD_GP         |
  *  |                                     |                                    |
  *  | GP kthread woke up and              |   CB kthread woke up and           |
@@ -159,7 +167,7 @@ struct rcu_cblist {
  *                                      |
  *                                      v
  *  ----------------------------------------------------------------------------
- *  |                                   0                                      |
+ *  |                SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING                    |
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
  *  | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed.    |
@@ -168,17 +176,18 @@ struct rcu_cblist {
  *                                      |
  *                                      v
  *  ----------------------------------------------------------------------------
- *  |                         SEGCBLIST_SOFTIRQ_ONLY                           |
+ *  |                         SEGCBLIST_RCU_CORE                               |
  *  |                                                                          |
  *  |  Callbacks processed by rcu_core() from softirqs or local                |
  *  |  rcuc kthread, without holding nocb_lock.                                |
  *  ----------------------------------------------------------------------------
  */
 #define SEGCBLIST_ENABLED	BIT(0)
-#define SEGCBLIST_SOFTIRQ_ONLY	BIT(1)
-#define SEGCBLIST_KTHREAD_CB	BIT(2)
-#define SEGCBLIST_KTHREAD_GP	BIT(3)
-#define SEGCBLIST_OFFLOADED	BIT(4)
+#define SEGCBLIST_RCU_CORE	BIT(1)
+#define SEGCBLIST_LOCKING	BIT(2)
+#define SEGCBLIST_KTHREAD_CB	BIT(3)
+#define SEGCBLIST_KTHREAD_GP	BIT(4)
+#define SEGCBLIST_OFFLOADED	BIT(5)
 
 struct rcu_segcblist {
 	struct rcu_head *head;
-- 
cgit v1.2.3


From fbb94cbd70d41c7511460896dfc7f9ea5da704b3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:08 +0200
Subject: rcu/nocb: Invoke rcu_core() at the start of deoffloading

On PREEMPT_RT, if rcu_core() is preempted by the de-offloading process,
some work, such as callbacks acceleration and invocation, may be left
unattended due to the volatile checks on the offloaded state.

In the worst case this work is postponed until the next rcu_pending()
check that can take a jiffy to reach, which can be a problem in case
of callbacks flooding.

Solve that with invoking rcu_core() early in the de-offloading process.
This way any work dismissed by an ongoing rcu_core() call fooled by
a preempting deoffloading process will be caught up by a nearby future
recall to rcu_core(), this time fully aware of the de-offloading state.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 812961b1d064..659d13a7ddaa 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -136,6 +136,20 @@ struct rcu_cblist {
  *  |--------------------------------------------------------------------------|
  *  |                           SEGCBLIST_RCU_CORE   |                         |
  *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
+ *  |                           SEGCBLIST_KTHREAD_CB |                         |
+ *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                                                                          |
+ *  |   CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core()    |
+ *  |   handles callbacks concurrently. Bypass enqueue is enabled.             |
+ *  |   Invoke RCU core so we make sure not to preempt it in the middle with   |
+ *  |   leaving some urgent work unattended within a jiffy.                    |
+ *  ----------------------------------------------------------------------------
+ *                                      |
+ *                                      v
+ *  |--------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE   |                         |
+ *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
-- 
cgit v1.2.3


From 802a7dc5cf1bef06f7b290ce76d478138408d6b1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 7 Dec 2021 10:03:23 -0800
Subject: netfilter: conntrack: annotate data-races around ct->timeout

(struct nf_conn)->timeout can be read/written locklessly,
add READ_ONCE()/WRITE_ONCE() to prevent load/store tearing.

BUG: KCSAN: data-race in __nf_conntrack_alloc / __nf_conntrack_find_get

write to 0xffff888132e78c08 of 4 bytes by task 6029 on cpu 0:
 __nf_conntrack_alloc+0x158/0x280 net/netfilter/nf_conntrack_core.c:1563
 init_conntrack+0x1da/0xb30 net/netfilter/nf_conntrack_core.c:1635
 resolve_normal_ct+0x502/0x610 net/netfilter/nf_conntrack_core.c:1746
 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901
 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414
 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline]
 nf_hook_slow+0x72/0x170 net/netfilter/core.c:619
 nf_hook include/linux/netfilter.h:262 [inline]
 NF_HOOK include/linux/netfilter.h:305 [inline]
 ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324
 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135
 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402
 tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline]
 tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680
 __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864
 tcp_push_pending_frames include/net/tcp.h:1897 [inline]
 tcp_data_snd_check+0x62/0x2e0 net/ipv4/tcp_input.c:5452
 tcp_rcv_established+0x880/0x10e0 net/ipv4/tcp_input.c:5947
 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521
 sk_backlog_rcv include/net/sock.h:1030 [inline]
 __release_sock+0xf2/0x270 net/core/sock.c:2768
 release_sock+0x40/0x110 net/core/sock.c:3300
 sk_stream_wait_memory+0x435/0x700 net/core/stream.c:145
 tcp_sendmsg_locked+0xb85/0x25a0 net/ipv4/tcp.c:1402
 tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1440
 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:644
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 __sys_sendto+0x21e/0x2c0 net/socket.c:2036
 __do_sys_sendto net/socket.c:2048 [inline]
 __se_sys_sendto net/socket.c:2044 [inline]
 __x64_sys_sendto+0x74/0x90 net/socket.c:2044
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888132e78c08 of 4 bytes by task 17446 on cpu 1:
 nf_ct_is_expired include/net/netfilter/nf_conntrack.h:286 [inline]
 ____nf_conntrack_find net/netfilter/nf_conntrack_core.c:776 [inline]
 __nf_conntrack_find_get+0x1c7/0xac0 net/netfilter/nf_conntrack_core.c:807
 resolve_normal_ct+0x273/0x610 net/netfilter/nf_conntrack_core.c:1734
 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901
 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414
 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline]
 nf_hook_slow+0x72/0x170 net/netfilter/core.c:619
 nf_hook include/linux/netfilter.h:262 [inline]
 NF_HOOK include/linux/netfilter.h:305 [inline]
 ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324
 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135
 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402
 __tcp_send_ack+0x1fd/0x300 net/ipv4/tcp_output.c:3956
 tcp_send_ack+0x23/0x30 net/ipv4/tcp_output.c:3962
 __tcp_ack_snd_check+0x2d8/0x510 net/ipv4/tcp_input.c:5478
 tcp_ack_snd_check net/ipv4/tcp_input.c:5523 [inline]
 tcp_rcv_established+0x8c2/0x10e0 net/ipv4/tcp_input.c:5948
 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521
 sk_backlog_rcv include/net/sock.h:1030 [inline]
 __release_sock+0xf2/0x270 net/core/sock.c:2768
 release_sock+0x40/0x110 net/core/sock.c:3300
 tcp_sendpage+0x94/0xb0 net/ipv4/tcp.c:1114
 inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833
 rds_tcp_xmit+0x376/0x5f0 net/rds/tcp_send.c:118
 rds_send_xmit+0xbed/0x1500 net/rds/send.c:367
 rds_send_worker+0x43/0x200 net/rds/threads.c:200
 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298
 worker_thread+0x616/0xa70 kernel/workqueue.c:2445
 kthread+0x2c7/0x2e0 kernel/kthread.c:327
 ret_from_fork+0x1f/0x30

value changed: 0x00027cc2 -> 0x00000000

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 17446 Comm: kworker/u4:5 Tainted: G        W         5.16.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: krdsd rds_send_worker

Note: I chose an arbitrary commit for the Fixes: tag,
because I do not think we need to backport this fix to very old kernels.

Fixes: e37542ba111f ("netfilter: conntrack: avoid possible false sharing")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index cc663c68ddc4..d24b0a34c8f0 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 /* jiffies until ct expires, 0 if already expired */
 static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
 {
-	s32 timeout = ct->timeout - nfct_time_stamp;
+	s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
 
 	return timeout > 0 ? timeout : 0;
 }
 
 static inline bool nf_ct_is_expired(const struct nf_conn *ct)
 {
-	return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
+	return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
 }
 
 /* use after obtaining a reference count */
@@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
 static inline void nf_ct_offload_timeout(struct nf_conn *ct)
 {
 	if (nf_ct_expires(ct) < NF_CT_DAY / 2)
-		ct->timeout = nfct_time_stamp + NF_CT_DAY;
+		WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
 }
 
 struct kernel_param;
-- 
cgit v1.2.3


From 81faa4f6fba429334ff72bb5ba7696818509b5b5 Mon Sep 17 00:00:00 2001
From: Li Zhijian <zhijianx.li@intel.com>
Date: Wed, 3 Nov 2021 16:30:28 +0800
Subject: locktorture,rcutorture,torture: Always log error message

Unconditionally log messages corresponding to errors.

Acked-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Li Zhijian <zhijianx.li@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/torture.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 24f58e50a94b..63fa4196e51c 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -38,13 +38,8 @@ do {										\
 		pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s);		\
 	}									\
 } while (0)
-#define VERBOSE_TOROUT_ERRSTRING(s) \
-do {										\
-	if (verbose) {								\
-		verbose_torout_sleep();						\
-		pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s);	\
-	}									\
-} while (0)
+#define TOROUT_ERRSTRING(s) \
+	pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s)
 void verbose_torout_sleep(void);
 
 #define torture_init_error(firsterr)						\
-- 
cgit v1.2.3


From 08f0b22d731fa86957749c649d6ef6ebc07e8ad2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:27 -0800
Subject: net: eql: add net device refcount tracker

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_eql.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/if_eql.h b/include/linux/if_eql.h
index d984694c384d..d75601d613cc 100644
--- a/include/linux/if_eql.h
+++ b/include/linux/if_eql.h
@@ -26,6 +26,7 @@
 typedef struct slave {
 	struct list_head	list;
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
 	long			priority;
 	long			priority_bps;
 	long			priority_Bps;
-- 
cgit v1.2.3


From 19c9ebf6ed70856385296a65e78c1699081b152f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:28 -0800
Subject: vlan: add net device refcount tracker

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_vlan.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 41a518336673..8420fe504927 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -162,6 +162,7 @@ struct netpoll;
  *	@vlan_id: VLAN identifier
  *	@flags: device flags
  *	@real_dev: underlying netdevice
+ *	@dev_tracker: refcount tracker for @real_dev reference
  *	@real_dev_addr: address of underlying netdevice
  *	@dent: proc dir entry
  *	@vlan_pcpu_stats: ptr to percpu rx stats
@@ -177,6 +178,8 @@ struct vlan_dev_priv {
 	u16					flags;
 
 	struct net_device			*real_dev;
+	netdevice_tracker			dev_tracker;
+
 	unsigned char				real_dev_addr[ETH_ALEN];
 
 	struct proc_dir_entry			*dent;
-- 
cgit v1.2.3


From f12bf6f3f942b37de65eeea8be25903587fec930 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:30 -0800
Subject: net: watchdog: add net device refcount tracker

Add a netdevice_tracker inside struct net_device, to track
the self reference when a device has an active watchdog timer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69dca1edd5a6..1a748ee9a421 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1951,6 +1951,7 @@ enum netdev_ml_priv_type {
  *
  *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
  *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
+ *	@watchdog_dev_tracker:	refcount tracker used by watchdog.
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
@@ -2282,6 +2283,7 @@ struct net_device {
 
 	u8 dev_addr_shadow[MAX_ADDR_LEN];
 	netdevice_tracker	linkwatch_dev_tracker;
+	netdevice_tracker	watchdog_dev_tracker;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
-- 
cgit v1.2.3


From e44b14ebae1025cff3bef2d78a2e2f6869cefca0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:32 -0800
Subject: inet: add net device refcount tracker to struct fib_nh_common

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3417ba2d27ad..c4297704bbcb 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -79,6 +79,7 @@ struct fnhe_hash_bucket {
 
 struct fib_nh_common {
 	struct net_device	*nhc_dev;
+	netdevice_tracker	nhc_dev_tracker;
 	int			nhc_oif;
 	unsigned char		nhc_scope;
 	u8			nhc_family;
@@ -111,6 +112,7 @@ struct fib_nh {
 	int			nh_saddr_genid;
 #define fib_nh_family		nh_common.nhc_family
 #define fib_nh_dev		nh_common.nhc_dev
+#define fib_nh_dev_tracker	nh_common.nhc_dev_tracker
 #define fib_nh_oif		nh_common.nhc_oif
 #define fib_nh_flags		nh_common.nhc_flags
 #define fib_nh_lws		nh_common.nhc_lwtstate
-- 
cgit v1.2.3


From 66ce07f7802b68616a008d390f2e6783d68fb79f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:33 -0800
Subject: ax25: add net device refcount tracker

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ax25.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/ax25.h b/include/net/ax25.h
index 03d409de61ad..526e49589197 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -229,7 +229,10 @@ struct ctl_table;
 
 typedef struct ax25_dev {
 	struct ax25_dev		*next;
+
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
+
 	struct net_device	*forward;
 	struct ctl_table_header *sysheader;
 	int			values[AX25_MAX_VALUES];
-- 
cgit v1.2.3


From 615d069dcf1207462ce30c05a2f47d461be8f6c8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:34 -0800
Subject: llc: add net device refcount tracker

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/llc_conn.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index ea985aa7a6c5..2c1ea3414640 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -38,6 +38,7 @@ struct llc_sock {
 	struct llc_addr	    laddr;		/* lsap/mac pair */
 	struct llc_addr	    daddr;		/* dsap/mac pair */
 	struct net_device   *dev;		/* device to send to remote */
+	netdevice_tracker   dev_tracker;
 	u32		    copied_seq;		/* head of yet unread data */
 	u8		    retry_count;	/* number of retries */
 	u8		    ack_must_be_send;
-- 
cgit v1.2.3


From ada066b2e02cad7934e86e51985078d707c64250 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Dec 2021 17:30:39 -0800
Subject: net: sched: act_mirred: add net device refcount tracker

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tc_act/tc_mirred.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 1cace4c69e44..32ce8ea36950 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -10,6 +10,7 @@ struct tcf_mirred {
 	int			tcfm_eaction;
 	bool			tcfm_mac_header_xmit;
 	struct net_device __rcu	*tcfm_dev;
+	netdevice_tracker	tcfm_dev_tracker;
 	struct list_head	tcfm_list;
 };
 #define to_mirred(a) ((struct tcf_mirred *)a)
-- 
cgit v1.2.3


From a97770cc4016c2733bcef9dbe3d5b1ad02d13356 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Mon, 6 Dec 2021 16:12:27 +0800
Subject: net: phy: Remove unnecessary indentation in the comments of
 phy_device

Fix warning as:

linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:543: WARNING: Unexpected indentation.
linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:544: WARNING: Block quote ends without a blank line; unexpected unindent.
linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:546: WARNING: Unexpected indentation.

Suggested-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 96e43fbb2dd8..cbf03a5f9cf5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -538,11 +538,12 @@ struct macsec_ops;
  * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
- *		Bits [15:0] are free to use by the PHY driver to communicate
- *			    driver specific behavior.
- *		Bits [23:16] are currently reserved for future use.
- *		Bits [31:24] are reserved for defining generic
- *			     PHY driver behavior.
+ *
+ *      - Bits [15:0] are free to use by the PHY driver to communicate
+ *        driver specific behavior.
+ *      - Bits [23:16] are currently reserved for future use.
+ *      - Bits [31:24] are reserved for defining generic
+ *        PHY driver behavior.
  * @irq: IRQ number of the PHY's interrupt (-1 if none)
  * @phy_timer: The timer for handling the state machine
  * @phylink: Pointer to phylink instance for this PHY
-- 
cgit v1.2.3


From 32ecd22ba60bbb724c6631d763ce77e5139bd341 Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Tue, 7 Dec 2021 09:00:30 -0800
Subject: net: mscc: ocelot: split register definitions to a separate file

Move these to a separate file will allow them to be shared to other
drivers.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/vsc7514_regs.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/soc/mscc/vsc7514_regs.h

(limited to 'include')

diff --git a/include/soc/mscc/vsc7514_regs.h b/include/soc/mscc/vsc7514_regs.h
new file mode 100644
index 000000000000..98743e252012
--- /dev/null
+++ b/include/soc/mscc/vsc7514_regs.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2021 Innovative Advantage Inc.
+ */
+
+#ifndef VSC7514_REGS_H
+#define VSC7514_REGS_H
+
+extern const u32 vsc7514_ana_regmap[];
+extern const u32 vsc7514_qs_regmap[];
+extern const u32 vsc7514_qsys_regmap[];
+extern const u32 vsc7514_rew_regmap[];
+extern const u32 vsc7514_sys_regmap[];
+extern const u32 vsc7514_vcap_regmap[];
+extern const u32 vsc7514_ptp_regmap[];
+extern const u32 vsc7514_dev_gmii_regmap[];
+
+extern const struct vcap_field vsc7514_vcap_es0_keys[];
+extern const struct vcap_field vsc7514_vcap_es0_actions[];
+extern const struct vcap_field vsc7514_vcap_is1_keys[];
+extern const struct vcap_field vsc7514_vcap_is1_actions[];
+extern const struct vcap_field vsc7514_vcap_is2_keys[];
+extern const struct vcap_field vsc7514_vcap_is2_actions[];
+
+#endif
-- 
cgit v1.2.3


From 330c6d3bfa268794bf692165d0f781f1c2d4d83e Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 24 Nov 2021 10:45:36 +0900
Subject: can: bittiming: replace CAN units with the generic ones from
 linux/units.h

In [1], we introduced a set of units in linux/can/bittiming.h. Since
then, generic SI prefixes were added to linux/units.h in [2]. Those
new prefixes can perfectly replace CAN specific ones.

This patch replaces all occurrences of the CAN units with their
corresponding prefix (from linux/units) and the unit (as a comment)
according to below table.

 CAN units	SI metric prefix (from linux/units) + unit (as a comment)
 ------------------------------------------------------------------------
 CAN_KBPS	KILO /* BPS */
 CAN_MBPS	MEGA /* BPS */
 CAM_MHZ	MEGA /* Hz */

The definition are then removed from linux/can/bittiming.h

[1] commit 1d7750760b70 ("can: bittiming: add CAN_KBPS, CAN_MBPS and
CAN_MHZ macros")

[2] commit 26471d4a6cf8 ("units: Add SI metric prefix definitions")

Link: https://lore.kernel.org/all/20211124014536.782550-1-mailhol.vincent@wanadoo.fr
Suggested-by: Jimmy Assarsson <extja@kvaser.com>
Suggested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/bittiming.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 20b50baf3a02..a81652d1c6f3 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -12,13 +12,6 @@
 #define CAN_SYNC_SEG 1
 
 
-/* Kilobits and Megabits per second */
-#define CAN_KBPS 1000UL
-#define CAN_MBPS 1000000UL
-
-/* Megahertz */
-#define CAN_MHZ 1000000UL
-
 #define CAN_CTRLMODE_TDC_MASK					\
 	(CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL)
 
-- 
cgit v1.2.3


From 27592ae8dbe41033261b6fdf27d78998aabd2665 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 16 Nov 2021 16:03:57 +0000
Subject: KVM: Move wiping of the kvm->vcpus array to common code

All architectures have similar loops iterating over the vcpus,
freeing one vcpu at a time, and eventually wiping the reference
off the vcpus array. They are also inconsistently taking
the kvm->lock mutex when wiping the references from the array.

Make this code common, which will simplify further changes.
The locking is dropped altogether, as this should only be called
when there is no further references on the kvm structure.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Message-Id: <20211116160403.4074052-2-maz@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c310648cc8f1..e2f9f8f67c58 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -733,7 +733,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 		if (WARN_ON_ONCE(!memslot->npages)) {			\
 		} else
 
-void kvm_vcpu_destroy(struct kvm_vcpu *vcpu);
+void kvm_destroy_vcpus(struct kvm *kvm);
 
 void vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From c5b077549136584618a66258f09d8d4b41e7409c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 16 Nov 2021 16:04:01 +0000
Subject: KVM: Convert the kvm->vcpus array to a xarray

At least on arm64 and x86, the vcpus array is pretty huge (up to
1024 entries on x86) and is mostly empty in the majority of the cases
(running 1k vcpu VMs is not that common).

This mean that we end-up with a 4kB block of unused memory in the
middle of the kvm structure.

Instead of wasting away this memory, let's use an xarray instead,
which gives us almost the same flexibility as a normal array, but
with a reduced memory usage with smaller VMs.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Message-Id: <20211116160403.4074052-6-maz@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e2f9f8f67c58..2201dc07126a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -29,6 +29,7 @@
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <linux/notifier.h>
+#include <linux/xarray.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -552,7 +553,7 @@ struct kvm {
 	struct mutex slots_arch_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
-	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+	struct xarray vcpu_array;
 
 	/* Used to wait for completion of MMU notifiers.  */
 	spinlock_t mn_invalidate_lock;
@@ -701,7 +702,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 
 	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu.  */
 	smp_rmb();
-	return kvm->vcpus[i];
+	return xa_load(&kvm->vcpu_array, i);
 }
 
 #define kvm_for_each_vcpu(idx, vcpup, kvm) \
-- 
cgit v1.2.3


From 46808a4cb89708c2e5b264eb9d1035762581921b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 16 Nov 2021 16:04:02 +0000
Subject: KVM: Use 'unsigned long' as kvm_for_each_vcpu()'s index

Everywhere we use kvm_for_each_vpcu(), we use an int as the vcpu
index. Unfortunately, we're about to move rework the iterator,
which requires this to be upgrade to an unsigned long.

Let's bite the bullet and repaint all of it in one go.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Message-Id: <20211116160403.4074052-7-maz@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2201dc07126a..7da6086262c6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -714,7 +714,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 {
 	struct kvm_vcpu *vcpu = NULL;
-	int i;
+	unsigned long i;
 
 	if (id < 0)
 		return NULL;
-- 
cgit v1.2.3


From 214bd3a6f46981b7867946e1b4f628a06bcf2091 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 16 Nov 2021 16:04:03 +0000
Subject: KVM: Convert kvm_for_each_vcpu() to using xa_for_each_range()

Now that the vcpu array is backed by an xarray, use the optimised
iterator that matches the underlying data structure.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Message-Id: <20211116160403.4074052-8-maz@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7da6086262c6..66548287ed42 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -705,11 +705,9 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 	return xa_load(&kvm->vcpu_array, i);
 }
 
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
-	for (idx = 0; \
-	     idx < atomic_read(&kvm->online_vcpus) && \
-	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
-	     idx++)
+#define kvm_for_each_vcpu(idx, vcpup, kvm)		   \
+	xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
+			  (atomic_read(&kvm->online_vcpus) - 1))
 
 static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 {
-- 
cgit v1.2.3


From afa319a54a8c760ba59683cd3c4318635049a664 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 6 Dec 2021 20:54:07 +0100
Subject: KVM: Require total number of memslot pages to fit in an unsigned long

Explicitly disallow creating more memslot pages than can fit in an
unsigned long, KVM doesn't correctly handle a total number of memslot
pages that doesn't fit in an unsigned long and remedying that would be a
waste of time.

For a 64-bit kernel, this is a nop as memslots are not allowed to overlap
in the gfn address space.

With a 32-bit kernel, userspace can at most address 3gb of virtual memory,
whereas wrapping the total number of pages would require 4tb+ of guest
physical memory.  Even with x86's second address space for SMM, userspace
would need to alias all of guest memory more than one _thousand_ times.
And on older x86 hardware with MAXPHYADDR < 43, the guest couldn't
actually access any of those aliases even if userspace lied about
guest.MAXPHYADDR.

On 390 and arm64, this is a nop as they don't support 32-bit hosts.

On x86, practically speaking this is simply acknowledging reality as the
existing kvm_mmu_calculate_default_mmu_pages() assumes the total number
of pages fits in an "unsigned long".

On PPC, this is likely a nop as every flavor of PPC KVM assumes gfns (and
gpas!) fit in unsigned long.  arch/powerpc/kvm/book3s_32_mmu_host.c goes
a step further and fails the build if CONFIG_PTE_64BIT=y, which
presumably means that it does't support 64-bit physical addresses.

On MIPS, this is also likely a nop as the core MMU helpers assume gpas
fit in unsigned long, e.g. see kvm_mips_##name##_pte.

And finally, RISC-V is a "don't care" as it doesn't exist in any release,
i.e. there is no established ABI to break.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <1c2c91baf8e78acccd4dad38da591002e61c013c.1638817638.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 66548287ed42..e38705359af5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -552,6 +552,7 @@ struct kvm {
 	 */
 	struct mutex slots_arch_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
+	unsigned long nr_memslot_pages;
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct xarray vcpu_array;
 
-- 
cgit v1.2.3


From 537a17b3149300987456e8949ccb991e604047d6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 6 Dec 2021 20:54:11 +0100
Subject: KVM: Let/force architectures to deal with arch specific memslot data

Pass the "old" slot to kvm_arch_prepare_memory_region() and force arch
code to handle propagating arch specific data from "new" to "old" when
necessary.  This is a baby step towards dynamically allocating "new" from
the get go, and is a (very) minor performance boost on x86 due to not
unnecessarily copying arch data.

For PPC HV, copy the rmap in the !CREATE and !DELETE paths, i.e. for MOVE
and FLAGS_ONLY.  This is functionally a nop as the previous behavior
would overwrite the pointer for CREATE, and eventually discard/ignore it
for DELETE.

For x86, copy the arch data only for FLAGS_ONLY changes.  Unlike PPC HV,
x86 needs to reallocate arch data in the MOVE case as the size of x86's
allocations depend on the alignment of the memslot's gfn.

Opportunistically tweak kvm_arch_prepare_memory_region()'s param order to
match the "commit" prototype.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
[mss: add missing RISCV kvm_arch_prepare_memory_region() change]
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <67dea5f11bbcfd71e3da5986f11e87f5dd4013f9.1638817639.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e38705359af5..cb7311dc6f32 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -833,8 +833,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				struct kvm_memory_slot *memslot,
 				const struct kvm_userspace_memory_region *mem,
+				const struct kvm_memory_slot *old,
+				struct kvm_memory_slot *new,
 				enum kvm_mr_change change);
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				const struct kvm_userspace_memory_region *mem,
-- 
cgit v1.2.3


From 6a99c6e3f52a6f0d4c6ebcfa7359c718a19ffbe6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 6 Dec 2021 20:54:18 +0100
Subject: KVM: Stop passing kvm_userspace_memory_region to arch memslot hooks

Drop the @mem param from kvm_arch_{prepare,commit}_memory_region() now
that its use has been removed in all architectures.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <aa5ed3e62c27e881d0d8bc0acbc1572bc336dc19.1638817640.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cb7311dc6f32..da0d4f21a150 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -833,12 +833,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				const struct kvm_userspace_memory_region *mem,
 				const struct kvm_memory_slot *old,
 				struct kvm_memory_slot *new,
 				enum kvm_mr_change change);
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-				const struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot *old,
 				const struct kvm_memory_slot *new,
 				enum kvm_mr_change change);
-- 
cgit v1.2.3


From c928bfc2632fa3dd6a3bd4504ac6d8e42302287a Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Mon, 6 Dec 2021 20:54:25 +0100
Subject: KVM: Integrate gfn_to_memslot_approx() into search_memslots()

s390 arch has gfn_to_memslot_approx() which is almost identical to
search_memslots(), differing only in that in case the gfn falls in a hole
one of the memslots bordering the hole is returned.

Add this lookup mode as an option to search_memslots() so we don't have two
almost identical functions for looking up a memslot by its gfn.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
[sean: tweaked helper names to keep gfn_to_memslot_approx() in s390]
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <171cd89b52c718dbe180ecd909b4437a64a7e2ec.1638817640.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index da0d4f21a150..2f80ce84fbcf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1233,10 +1233,14 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
  * Returns a pointer to the memslot that contains gfn and records the index of
  * the slot in index. Otherwise returns NULL.
  *
+ * With "approx" set returns the memslot also when the address falls
+ * in a hole. In that case one of the memslots bordering the hole is
+ * returned.
+ *
  * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
  */
 static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index, bool approx)
 {
 	int start = 0, end = slots->used_slots;
 	struct kvm_memory_slot *memslots = slots->memslots;
@@ -1254,22 +1258,26 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
 			start = slot + 1;
 	}
 
+	if (approx && start >= slots->used_slots) {
+		*index = slots->used_slots - 1;
+		return &memslots[slots->used_slots - 1];
+	}
+
 	slot = try_get_memslot(slots, start, gfn);
 	if (slot) {
 		*index = start;
 		return slot;
 	}
+	if (approx) {
+		*index = start;
+		return &memslots[start];
+	}
 
 	return NULL;
 }
 
-/*
- * __gfn_to_memslot() and its descendants are here because it is called from
- * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot()
- * itself isn't here as an inline because that would bloat other code too much.
- */
 static inline struct kvm_memory_slot *
-__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx)
 {
 	struct kvm_memory_slot *slot;
 	int slot_index = atomic_read(&slots->last_used_slot);
@@ -1278,7 +1286,7 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
 	if (slot)
 		return slot;
 
-	slot = search_memslots(slots, gfn, &slot_index);
+	slot = search_memslots(slots, gfn, &slot_index, approx);
 	if (slot) {
 		atomic_set(&slots->last_used_slot, slot_index);
 		return slot;
@@ -1287,6 +1295,17 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
 	return NULL;
 }
 
+/*
+ * __gfn_to_memslot() and its descendants are here to allow arch code to inline
+ * the lookups in hot paths.  gfn_to_memslot() itself isn't here as an inline
+ * because that would bloat other code too much.
+ */
+static inline struct kvm_memory_slot *
+__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+{
+	return ____gfn_to_memslot(slots, gfn, false);
+}
+
 static inline unsigned long
 __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
-- 
cgit v1.2.3


From 26b8345abc75a7404716864710930407b7d873f9 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Mon, 6 Dec 2021 20:54:27 +0100
Subject: KVM: Resolve memslot ID via a hash table instead of via a static
 array

Memslot ID to the corresponding memslot mappings are currently kept as
indices in static id_to_index array.
The size of this array depends on the maximum allowed memslot count
(regardless of the number of memslots actually in use).

This has become especially problematic recently, when memslot count cap was
removed, so the maximum count is now full 32k memslots - the maximum
allowed by the current KVM API.

Keeping these IDs in a hash table (instead of an array) avoids this
problem.

Resolving a memslot ID to the actual memslot (instead of its index) will
also enable transitioning away from an array-based implementation of the
whole memslots structure in a later commit.

Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <117fb2c04320e6cd6cf34f205a72eadb0aa8d5f9.1638817640.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2f80ce84fbcf..79db70a8323e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -29,6 +29,7 @@
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <linux/notifier.h>
+#include <linux/hashtable.h>
 #include <linux/xarray.h>
 #include <asm/signal.h>
 
@@ -426,6 +427,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 
 struct kvm_memory_slot {
+	struct hlist_node id_node;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long *dirty_bitmap;
@@ -527,8 +529,15 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
  */
 struct kvm_memslots {
 	u64 generation;
-	/* The mapping table from slot id to the index in memslots[]. */
-	short id_to_index[KVM_MEM_SLOTS_NUM];
+	/*
+	 * The mapping table from slot id to the index in memslots[].
+	 *
+	 * 7-bit bucket count matches the size of the old id to index array for
+	 * 512 slots, while giving good performance with this slot count.
+	 * Higher bucket counts bring only small performance improvements but
+	 * always result in higher memory usage (even for lower memslot counts).
+	 */
+	DECLARE_HASHTABLE(id_hash, 7);
 	atomic_t last_used_slot;
 	int used_slots;
 	struct kvm_memory_slot memslots[];
@@ -796,16 +805,14 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 static inline
 struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
 {
-	int index = slots->id_to_index[id];
 	struct kvm_memory_slot *slot;
 
-	if (index < 0)
-		return NULL;
-
-	slot = &slots->memslots[index];
+	hash_for_each_possible(slots->id_hash, slot, id_node, id) {
+		if (slot->id == id)
+			return slot;
+	}
 
-	WARN_ON(slot->id != id);
-	return slot;
+	return NULL;
 }
 
 /*
-- 
cgit v1.2.3


From ed922739c9199bf515a3e7fec3e319ce1edeef2a Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Mon, 6 Dec 2021 20:54:28 +0100
Subject: KVM: Use interval tree to do fast hva lookup in memslots

The current memslots implementation only allows quick binary search by gfn,
quick lookup by hva is not possible - the implementation has to do a linear
scan of the whole memslots array, even though the operation being performed
might apply just to a single memslot.

This significantly hurts performance of per-hva operations with higher
memslot counts.

Since hva ranges can overlap between memslots an interval tree is needed
for tracking them.

[sean: handle interval tree updates in kvm_replace_memslot()]
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <d66b9974becaa9839be9c4e1a5de97b177b4ac20.1638817640.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 79db70a8323e..9552ad6d6652 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -30,6 +30,7 @@
 #include <linux/nospec.h>
 #include <linux/notifier.h>
 #include <linux/hashtable.h>
+#include <linux/interval_tree.h>
 #include <linux/xarray.h>
 #include <asm/signal.h>
 
@@ -428,6 +429,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 
 struct kvm_memory_slot {
 	struct hlist_node id_node;
+	struct interval_tree_node hva_node;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long *dirty_bitmap;
@@ -529,6 +531,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
  */
 struct kvm_memslots {
 	u64 generation;
+	struct rb_root_cached hva_tree;
 	/*
 	 * The mapping table from slot id to the index in memslots[].
 	 *
-- 
cgit v1.2.3


From a54d806688fe1e482350ce759a8a0fc9ebf814b0 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Mon, 6 Dec 2021 20:54:30 +0100
Subject: KVM: Keep memslots in tree-based structures instead of array-based
 ones

The current memslot code uses a (reverse gfn-ordered) memslot array for
keeping track of them.

Because the memslot array that is currently in use cannot be modified
every memslot management operation (create, delete, move, change flags)
has to make a copy of the whole array so it has a scratch copy to work on.

Strictly speaking, however, it is only necessary to make copy of the
memslot that is being modified, copying all the memslots currently present
is just a limitation of the array-based memslot implementation.

Two memslot sets, however, are still needed so the VM continues to run
on the currently active set while the requested operation is being
performed on the second, currently inactive one.

In order to have two memslot sets, but only one copy of actual memslots
it is necessary to split out the memslot data from the memslot sets.

The memslots themselves should be also kept independent of each other
so they can be individually added or deleted.

These two memslot sets should normally point to the same set of
memslots. They can, however, be desynchronized when performing a
memslot management operation by replacing the memslot to be modified
by its copy.  After the operation is complete, both memslot sets once
again point to the same, common set of memslot data.

This commit implements the aforementioned idea.

For tracking of gfns an ordinary rbtree is used since memslots cannot
overlap in the guest address space and so this data structure is
sufficient for ensuring that lookups are done quickly.

The "last used slot" mini-caches (both per-slot set one and per-vCPU one),
that keep track of the last found-by-gfn memslot, are still present in the
new code.

Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 143 +++++++++++++++++++++++------------------------
 1 file changed, 71 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9552ad6d6652..9eda8a63feae 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -31,6 +31,7 @@
 #include <linux/notifier.h>
 #include <linux/hashtable.h>
 #include <linux/interval_tree.h>
+#include <linux/rbtree.h>
 #include <linux/xarray.h>
 #include <asm/signal.h>
 
@@ -358,11 +359,13 @@ struct kvm_vcpu {
 	struct kvm_dirty_ring dirty_ring;
 
 	/*
-	 * The index of the most recently used memslot by this vCPU. It's ok
-	 * if this becomes stale due to memslot changes since we always check
-	 * it is a valid slot.
+	 * The most recently used memslot by this vCPU and the slots generation
+	 * for which it is valid.
+	 * No wraparound protection is needed since generations won't overflow in
+	 * thousands of years, even assuming 1M memslot operations per second.
 	 */
-	int last_used_slot;
+	struct kvm_memory_slot *last_used_slot;
+	u64 last_used_slot_gen;
 };
 
 /* must be called with irqs disabled */
@@ -427,9 +430,26 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
  */
 #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 
+/*
+ * Since at idle each memslot belongs to two memslot sets it has to contain
+ * two embedded nodes for each data structure that it forms a part of.
+ *
+ * Two memslot sets (one active and one inactive) are necessary so the VM
+ * continues to run on one memslot set while the other is being modified.
+ *
+ * These two memslot sets normally point to the same set of memslots.
+ * They can, however, be desynchronized when performing a memslot management
+ * operation by replacing the memslot to be modified by its copy.
+ * After the operation is complete, both memslot sets once again point to
+ * the same, common set of memslot data.
+ *
+ * The memslots themselves are independent of each other so they can be
+ * individually added or deleted.
+ */
 struct kvm_memory_slot {
-	struct hlist_node id_node;
-	struct interval_tree_node hva_node;
+	struct hlist_node id_node[2];
+	struct interval_tree_node hva_node[2];
+	struct rb_node gfn_node[2];
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long *dirty_bitmap;
@@ -524,16 +544,13 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 }
 #endif
 
-/*
- * Note:
- * memslots are not sorted by id anymore, please use id_to_memslot()
- * to get the memslot by its id.
- */
 struct kvm_memslots {
 	u64 generation;
+	atomic_long_t last_used_slot;
 	struct rb_root_cached hva_tree;
+	struct rb_root gfn_tree;
 	/*
-	 * The mapping table from slot id to the index in memslots[].
+	 * The mapping table from slot id to memslot.
 	 *
 	 * 7-bit bucket count matches the size of the old id to index array for
 	 * 512 slots, while giving good performance with this slot count.
@@ -541,9 +558,7 @@ struct kvm_memslots {
 	 * always result in higher memory usage (even for lower memslot counts).
 	 */
 	DECLARE_HASHTABLE(id_hash, 7);
-	atomic_t last_used_slot;
-	int used_slots;
-	struct kvm_memory_slot memslots[];
+	int node_idx;
 };
 
 struct kvm {
@@ -565,6 +580,9 @@ struct kvm {
 	struct mutex slots_arch_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	unsigned long nr_memslot_pages;
+	/* The two memslot sets - active and inactive (per address space) */
+	struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2];
+	/* The current active memslot set for each address space */
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct xarray vcpu_array;
 
@@ -739,11 +757,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	return NULL;
 }
 
-#define kvm_for_each_memslot(memslot, slots)				\
-	for (memslot = &slots->memslots[0];				\
-	     memslot < slots->memslots + slots->used_slots; memslot++)	\
-		if (WARN_ON_ONCE(!memslot->npages)) {			\
-		} else
+static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
+{
+	return vcpu->vcpu_idx;
+}
 
 void kvm_destroy_vcpus(struct kvm *kvm);
 
@@ -805,12 +822,23 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 	return __kvm_memslots(vcpu->kvm, as_id);
 }
 
+static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
+{
+	return RB_EMPTY_ROOT(&slots->gfn_tree);
+}
+
+#define kvm_for_each_memslot(memslot, bkt, slots)			      \
+	hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
+		if (WARN_ON_ONCE(!memslot->npages)) {			      \
+		} else
+
 static inline
 struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
 {
 	struct kvm_memory_slot *slot;
+	int idx = slots->node_idx;
 
-	hash_for_each_possible(slots->id_hash, slot, id_node, id) {
+	hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) {
 		if (slot->id == id)
 			return slot;
 	}
@@ -1214,25 +1242,15 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
 
 /*
- * Returns a pointer to the memslot at slot_index if it contains gfn.
+ * Returns a pointer to the memslot if it contains gfn.
  * Otherwise returns NULL.
  */
 static inline struct kvm_memory_slot *
-try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
+try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	struct kvm_memory_slot *slot;
-
-	if (slot_index < 0 || slot_index >= slots->used_slots)
+	if (!slot)
 		return NULL;
 
-	/*
-	 * slot_index can come from vcpu->last_used_slot which is not kept
-	 * in sync with userspace-controllable memslot deletion. So use nospec
-	 * to prevent the CPU from speculating past the end of memslots[].
-	 */
-	slot_index = array_index_nospec(slot_index, slots->used_slots);
-	slot = &slots->memslots[slot_index];
-
 	if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
 		return slot;
 	else
@@ -1240,65 +1258,46 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
 }
 
 /*
- * Returns a pointer to the memslot that contains gfn and records the index of
- * the slot in index. Otherwise returns NULL.
+ * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL.
  *
  * With "approx" set returns the memslot also when the address falls
  * in a hole. In that case one of the memslots bordering the hole is
  * returned.
- *
- * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
  */
 static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index, bool approx)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx)
 {
-	int start = 0, end = slots->used_slots;
-	struct kvm_memory_slot *memslots = slots->memslots;
 	struct kvm_memory_slot *slot;
-
-	if (unlikely(!slots->used_slots))
-		return NULL;
-
-	while (start < end) {
-		int slot = start + (end - start) / 2;
-
-		if (gfn >= memslots[slot].base_gfn)
-			end = slot;
-		else
-			start = slot + 1;
-	}
-
-	if (approx && start >= slots->used_slots) {
-		*index = slots->used_slots - 1;
-		return &memslots[slots->used_slots - 1];
-	}
-
-	slot = try_get_memslot(slots, start, gfn);
-	if (slot) {
-		*index = start;
-		return slot;
-	}
-	if (approx) {
-		*index = start;
-		return &memslots[start];
+	struct rb_node *node;
+	int idx = slots->node_idx;
+
+	slot = NULL;
+	for (node = slots->gfn_tree.rb_node; node; ) {
+		slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]);
+		if (gfn >= slot->base_gfn) {
+			if (gfn < slot->base_gfn + slot->npages)
+				return slot;
+			node = node->rb_right;
+		} else
+			node = node->rb_left;
 	}
 
-	return NULL;
+	return approx ? slot : NULL;
 }
 
 static inline struct kvm_memory_slot *
 ____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx)
 {
 	struct kvm_memory_slot *slot;
-	int slot_index = atomic_read(&slots->last_used_slot);
 
-	slot = try_get_memslot(slots, slot_index, gfn);
+	slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot);
+	slot = try_get_memslot(slot, gfn);
 	if (slot)
 		return slot;
 
-	slot = search_memslots(slots, gfn, &slot_index, approx);
+	slot = search_memslots(slots, gfn, approx);
 	if (slot) {
-		atomic_set(&slots->last_used_slot, slot_index);
+		atomic_long_set(&slots->last_used_slot, (unsigned long)slot);
 		return slot;
 	}
 
-- 
cgit v1.2.3


From f4209439b522432d140d33393d4a3f12e695527b Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Mon, 6 Dec 2021 20:54:32 +0100
Subject: KVM: Optimize gfn lookup in kvm_zap_gfn_range()

Introduce a memslots gfn upper bound operation and use it to optimize
kvm_zap_gfn_range().
This way this handler can do a quick lookup for intersecting gfns and won't
have to do a linear scan of the whole memslot set.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <ef242146a87a335ee93b441dcf01665cb847c902.1638817641.git.maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9eda8a63feae..3bc98497e796 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -846,6 +846,100 @@ struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
 	return NULL;
 }
 
+/* Iterator used for walking memslots that overlap a gfn range. */
+struct kvm_memslot_iter {
+	struct kvm_memslots *slots;
+	struct rb_node *node;
+	struct kvm_memory_slot *slot;
+};
+
+static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter)
+{
+	iter->node = rb_next(iter->node);
+	if (!iter->node)
+		return;
+
+	iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]);
+}
+
+static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter,
+					  struct kvm_memslots *slots,
+					  gfn_t start)
+{
+	int idx = slots->node_idx;
+	struct rb_node *tmp;
+	struct kvm_memory_slot *slot;
+
+	iter->slots = slots;
+
+	/*
+	 * Find the so called "upper bound" of a key - the first node that has
+	 * its key strictly greater than the searched one (the start gfn in our case).
+	 */
+	iter->node = NULL;
+	for (tmp = slots->gfn_tree.rb_node; tmp; ) {
+		slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]);
+		if (start < slot->base_gfn) {
+			iter->node = tmp;
+			tmp = tmp->rb_left;
+		} else {
+			tmp = tmp->rb_right;
+		}
+	}
+
+	/*
+	 * Find the slot with the lowest gfn that can possibly intersect with
+	 * the range, so we'll ideally have slot start <= range start
+	 */
+	if (iter->node) {
+		/*
+		 * A NULL previous node means that the very first slot
+		 * already has a higher start gfn.
+		 * In this case slot start > range start.
+		 */
+		tmp = rb_prev(iter->node);
+		if (tmp)
+			iter->node = tmp;
+	} else {
+		/* a NULL node below means no slots */
+		iter->node = rb_last(&slots->gfn_tree);
+	}
+
+	if (iter->node) {
+		iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]);
+
+		/*
+		 * It is possible in the slot start < range start case that the
+		 * found slot ends before or at range start (slot end <= range start)
+		 * and so it does not overlap the requested range.
+		 *
+		 * In such non-overlapping case the next slot (if it exists) will
+		 * already have slot start > range start, otherwise the logic above
+		 * would have found it instead of the current slot.
+		 */
+		if (iter->slot->base_gfn + iter->slot->npages <= start)
+			kvm_memslot_iter_next(iter);
+	}
+}
+
+static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end)
+{
+	if (!iter->node)
+		return false;
+
+	/*
+	 * If this slot starts beyond or at the end of the range so does
+	 * every next one
+	 */
+	return iter->slot->base_gfn < end;
+}
+
+/* Iterate over each memslot at least partially intersecting [start, end) range */
+#define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end)	\
+	for (kvm_memslot_iter_start(iter, slots, start);		\
+	     kvm_memslot_iter_is_valid(iter, end);			\
+	     kvm_memslot_iter_next(iter))
+
 /*
  * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
  * - create a new memory slot
-- 
cgit v1.2.3


From 8283e36abfff507c64fe8289ac30ea7ab59648aa Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Mon, 15 Nov 2021 15:45:58 -0800
Subject: KVM: x86/mmu: Propagate memslot const qualifier

In preparation for implementing in-place hugepage promotion, various
functions will need to be called from zap_collapsible_spte_range, which
has the const qualifier on its memslot argument. Propagate the const
qualifier to the various functions which will be needed. This just serves
to simplify the following patch.

No functional change intended.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20211115234603.2908381-11-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3bc98497e796..3eb7695aaa73 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -460,7 +460,7 @@ struct kvm_memory_slot {
 	u16 as_id;
 };
 
-static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot)
+static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
 {
 	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
 }
@@ -994,9 +994,9 @@ void kvm_set_page_accessed(struct page *page);
 kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
-kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
+kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
 			       bool atomic, bool *async, bool write_fault,
 			       bool *writable, hva_t *hva);
 
@@ -1073,7 +1073,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
+void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From 510958e997217e39a16b47afb5a44dfa39013964 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 8 Oct 2021 19:11:57 -0700
Subject: KVM: Force PPC to define its own rcuwait object

Do not define/reference kvm_vcpu.wait if __KVM_HAVE_ARCH_WQP is true, and
instead force the architecture (PPC) to define its own rcuwait object.
Allowing common KVM to directly access vcpu->wait without a guard makes
it all too easy to introduce potential bugs, e.g. kvm_vcpu_block(),
kvm_vcpu_on_spin(), and async_pf_execute() all operate on vcpu->wait, not
the result of kvm_arch_vcpu_get_wait(), and so may do the wrong thing for
PPC.

Due to PPC's shenanigans with respect to callbacks and waits (it switches
to the virtual core's wait object at KVM_RUN!?!?), it's not clear whether
or not this fixes any bugs.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20211009021236.4122790-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3eb7695aaa73..afacbfb2e482 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -314,7 +314,9 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
+#ifndef __KVM_HAVE_ARCH_WQP
 	struct rcuwait wait;
+#endif
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
-- 
cgit v1.2.3


From 91b99ea7065786d0bff1c9281b002455dbaeb08b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 8 Oct 2021 19:12:06 -0700
Subject: KVM: Rename kvm_vcpu_block() => kvm_vcpu_halt()

Rename kvm_vcpu_block() to kvm_vcpu_halt() in preparation for splitting
the actual "block" sequences into a separate helper (to be named
kvm_vcpu_block()).  x86 will use the standalone block-only path to handle
non-halt cases where the vCPU is not runnable.

Rename block_ns to halt_ns to match the new function name.

No functional change intended.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20211009021236.4122790-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index afacbfb2e482..ea3c22d55d56 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1102,7 +1102,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
 
-void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From fac4268894394213127e43856f41d10f29131e69 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 8 Oct 2021 19:12:07 -0700
Subject: KVM: Split out a kvm_vcpu_block() helper from kvm_vcpu_halt()

Factor out the "block" part of kvm_vcpu_halt() so that x86 can emulate
non-halt wait/sleep/block conditions that should not be subjected to
halt-polling.

No functional change intended.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20211009021236.4122790-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea3c22d55d56..bd13c5b5bd1d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1103,6 +1103,7 @@ void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
 
 void kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From c3858335c711569b82a234a560dc19247e8f3fcc Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 8 Oct 2021 19:12:08 -0700
Subject: KVM: stats: Add stat to detect if vcpu is currently blocking

Add a "blocking" stat that userspace can use to detect the case where a
vCPU is not being run because of an vCPU/guest action, e.g. HLT or WFS on
x86, WFI on arm64, etc...  Current guest/host/halt stats don't show this
well, e.g. if a guest halts for a long period of time then the vCPU could
could appear pathologically blocked due to a host condition, when in
reality the vCPU has been put into a not-runnable state by the guest.

Originally-by: Cannon Matthews <cannonmatthews@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Jing Zhang <jingzhangos@google.com>
[sean: renamed stat to "blocking", massaged changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20211009021236.4122790-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 3 ++-
 include/linux/kvm_types.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd13c5b5bd1d..dc7740cafea7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1587,7 +1587,8 @@ struct _kvm_stats_desc {
 	STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist,	       \
 			HALT_POLL_HIST_COUNT),				       \
 	STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist,	       \
-			HALT_POLL_HIST_COUNT)
+			HALT_POLL_HIST_COUNT),				       \
+	STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking)
 
 extern struct dentry *kvm_debugfs_dir;
 
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 234eab059839..888ef12862c9 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -87,6 +87,7 @@ struct kvm_vcpu_stat_generic {
 	u64 halt_poll_success_hist[HALT_POLL_HIST_COUNT];
 	u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
 	u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
+	u64 blocking;
 };
 
 #define KVM_STATS_NAME_SIZE	48
-- 
cgit v1.2.3


From d92a5d1c6c757f659ffb9c2c2e65fcf3d571c14e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 8 Oct 2021 19:12:12 -0700
Subject: KVM: Add helpers to wake/query blocking vCPU

Add helpers to wake and query a blocking vCPU.  In addition to providing
nice names, the helpers reduce the probability of KVM neglecting to use
kvm_arch_vcpu_get_wait().

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20211009021236.4122790-20-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dc7740cafea7..f8ed799e8674 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1286,6 +1286,20 @@ static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * Wake a vCPU if necessary, but don't do any stats/metadata updates.  Returns
+ * true if the vCPU was blocking and was awakened, false otherwise.
+ */
+static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
+{
+	return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+}
+
+static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu)
+{
+	return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu));
+}
+
 #ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
 /*
  * returns true if the virtual interrupt controller is initialized and
-- 
cgit v1.2.3


From 265d27caf95f8959ddd4a33e52c4b4dc4e31c308 Mon Sep 17 00:00:00 2001
From: Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
Date: Wed, 3 Nov 2021 11:49:33 +0530
Subject: dt-bindings: clock: lan966x: Add binding includes for lan966x SoC
 clock IDs

LAN966X supports 14 clock outputs for its peripherals.
This include file is introduced to use identifiers for clocks.

Signed-off-by: Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20211103061935.25677-2-kavyasree.kotagiri@microchip.com
---
 include/dt-bindings/clock/microchip,lan966x.h | 28 +++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/dt-bindings/clock/microchip,lan966x.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/microchip,lan966x.h b/include/dt-bindings/clock/microchip,lan966x.h
new file mode 100644
index 000000000000..fe36ed6d8b5f
--- /dev/null
+++ b/include/dt-bindings/clock/microchip,lan966x.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021 Microchip Inc.
+ *
+ * Author: Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_LAN966X_H
+#define _DT_BINDINGS_CLK_LAN966X_H
+
+#define GCK_ID_QSPI0		0
+#define GCK_ID_QSPI1		1
+#define GCK_ID_QSPI2		2
+#define GCK_ID_SDMMC0		3
+#define GCK_ID_PI		4
+#define GCK_ID_MCAN0		5
+#define GCK_ID_MCAN1		6
+#define GCK_ID_FLEXCOM0		7
+#define GCK_ID_FLEXCOM1		8
+#define GCK_ID_FLEXCOM2		9
+#define GCK_ID_FLEXCOM3		10
+#define GCK_ID_FLEXCOM4		11
+#define GCK_ID_TIMER		12
+#define GCK_ID_USB_REFCLK	13
+
+#define N_CLOCKS		14
+
+#endif
-- 
cgit v1.2.3


From 815f0e738a8d5663a02350e2580706829144a722 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Wed, 3 Nov 2021 09:50:59 +0100
Subject: clk: gate: Add devm_clk_hw_register_gate()

Add devm_clk_hw_register_gate() - devres-managed version of
clk_hw_register_gate()

Suggested-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20211103085102.1656081-2-horatiu.vultur@microchip.com
---
 include/linux/clk-provider.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index f59c875271a0..2faa6f7aa8a8 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -490,6 +490,13 @@ struct clk_hw *__clk_hw_register_gate(struct device *dev,
 		unsigned long flags,
 		void __iomem *reg, u8 bit_idx,
 		u8 clk_gate_flags, spinlock_t *lock);
+struct clk_hw *__devm_clk_hw_register_gate(struct device *dev,
+		struct device_node *np, const char *name,
+		const char *parent_name, const struct clk_hw *parent_hw,
+		const struct clk_parent_data *parent_data,
+		unsigned long flags,
+		void __iomem *reg, u8 bit_idx,
+		u8 clk_gate_flags, spinlock_t *lock);
 struct clk *clk_register_gate(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 bit_idx,
@@ -544,6 +551,22 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
 	__clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \
 			       (flags), (reg), (bit_idx),		      \
 			       (clk_gate_flags), (lock))
+/**
+ * devm_clk_hw_register_gate - register a gate clock with the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of this clock's parent
+ * @flags: framework-specific flags for this clock
+ * @reg: register address to control gating of this clock
+ * @bit_idx: which bit in the register controls gating of this clock
+ * @clk_gate_flags: gate-specific flags for this clock
+ * @lock: shared register lock for this clock
+ */
+#define devm_clk_hw_register_gate(dev, name, parent_name, flags, reg, bit_idx,\
+				  clk_gate_flags, lock)			      \
+	__devm_clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
+			       NULL, (flags), (reg), (bit_idx),		      \
+			       (clk_gate_flags), (lock))
 void clk_unregister_gate(struct clk *clk);
 void clk_hw_unregister_gate(struct clk_hw *hw);
 int clk_gate_is_enabled(struct clk_hw *hw);
-- 
cgit v1.2.3


From 51d0a37dde9b63111d14a59bb77c5cf0273e4c9e Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Wed, 3 Nov 2021 09:51:01 +0100
Subject: dt-bindings: clock: lan966x: Extend includes with clock gates

On lan966x it is allow to control the clock to some peripherals like
USB. So extend the include file with these clocks.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20211103085102.1656081-4-horatiu.vultur@microchip.com
---
 include/dt-bindings/clock/microchip,lan966x.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/microchip,lan966x.h b/include/dt-bindings/clock/microchip,lan966x.h
index fe36ed6d8b5f..6f9d43d76d5a 100644
--- a/include/dt-bindings/clock/microchip,lan966x.h
+++ b/include/dt-bindings/clock/microchip,lan966x.h
@@ -23,6 +23,12 @@
 #define GCK_ID_TIMER		12
 #define GCK_ID_USB_REFCLK	13
 
-#define N_CLOCKS		14
+/* Gate clocks */
+#define GCK_GATE_UHPHS		14
+#define GCK_GATE_UDPHS		15
+#define GCK_GATE_MCRAMC		16
+#define GCK_GATE_HMATRIX	17
+
+#define N_CLOCKS		18
 
 #endif
-- 
cgit v1.2.3


From 7cfa3d00730a4c0694b55fb1974823baeab8815b Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Wed, 8 Dec 2021 18:17:18 +0800
Subject: ASoC: rt5682s: add delay time to fix pop sound issue

There is a pop noise at the beginning of the capture data.
This patch adds the delay time before stereo1 ADC unmute to fix the pop sound issue.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Link: https://lore.kernel.org/r/20211208101718.28945-1-shumingf@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/rt5682s.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/rt5682s.h b/include/sound/rt5682s.h
index accfbc2dcdd2..f18d91308b9a 100644
--- a/include/sound/rt5682s.h
+++ b/include/sound/rt5682s.h
@@ -40,6 +40,7 @@ struct rt5682s_platform_data {
 	enum rt5682s_jd_src jd_src;
 	unsigned int dmic_clk_rate;
 	unsigned int dmic_delay;
+	unsigned int amic_delay;
 	bool dmic_clk_driving_high;
 
 	const char *dai_clk_names[RT5682S_DAI_NUM_CLKS];
-- 
cgit v1.2.3


From 444dd878e85fb33fcfb2682cfdab4c236f33ea3e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Dec 2021 17:19:47 +0100
Subject: PM: runtime: Fix pm_runtime_active() kerneldoc comment

The kerneldoc comment of pm_runtime_active() does not reflect the
behavior of the function, so update it accordingly.

Fixes: 403d2d116ec0 ("PM: runtime: Add kerneldoc comments to multiple helpers")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pm_runtime.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 222da43b7096..eddd66d426ca 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -129,7 +129,7 @@ static inline bool pm_runtime_suspended(struct device *dev)
  * pm_runtime_active - Check whether or not a device is runtime-active.
  * @dev: Target device.
  *
- * Return %true if runtime PM is enabled for @dev and its runtime PM status is
+ * Return %true if runtime PM is disabled for @dev or its runtime PM status is
  * %RPM_ACTIVE, or %false otherwise.
  *
  * Note that the return value of this function can only be trusted if it is
-- 
cgit v1.2.3


From 74d9555580c48a04b2c3b742dfb0c80777aa0b26 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Mon, 8 Nov 2021 16:09:41 +0000
Subject: PM: hibernate: Allow ACPI hardware signature to be honoured

Theoretically, when the hardware signature in FACS changes, the OS
is supposed to gracefully decline to attempt to resume from S4:

 "If the signature has changed, OSPM will not restore the system
  context and can boot from scratch"

In practice, Windows doesn't do this and many laptop vendors do allow
the signature to change especially when docking/undocking, so it would
be a bad idea to simply comply with the specification by default in the
general case.

However, there are use cases where we do want the compliant behaviour
and we know it's safe. Specifically, when resuming virtual machines where
we know the hypervisor has changed sufficiently that resume will fail.
We really want to be able to *tell* the guest kernel not to try, so it
boots cleanly and doesn't just crash. This patch provides a way to opt
in to the spec-compliant behaviour on the command line.

A follow-up patch may do this automatically for certain "known good"
machines based on a DMI match, or perhaps just for all hypervisor
guests since there's no good reason a hypervisor would change the
hardware_signature that it exposes to guests *unless* it wants them
to obey the ACPI specification.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h    | 2 +-
 include/linux/suspend.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b28f8790192a..6c0798db6bde 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -506,7 +506,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
 int acpi_resources_are_enforced(void);
 
 #ifdef CONFIG_HIBERNATION
-void __init acpi_no_s4_hw_signature(void);
+void __init acpi_check_s4_hw_signature(int check);
 #endif
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 8af13ba60c7e..5785d909c321 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -446,6 +446,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 extern asmlinkage int swsusp_arch_suspend(void);
 extern asmlinkage int swsusp_arch_resume(void);
 
+extern u32 swsusp_hardware_signature;
 extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
-- 
cgit v1.2.3


From 8260b9820f7050461b8969305bbd8cb5654f0c74 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Mon, 6 Dec 2021 16:55:03 +0300
Subject: x86/sev: Use CC_ATTR attribute to generalize string I/O unroll

INS/OUTS are not supported in TDX guests and cause #UD. Kernel has to
avoid them when running in TDX guest. To support existing usage, string
I/O operations are unrolled using IN/OUT instructions.

AMD SEV platform implements this support by adding unroll
logic in ins#bwl()/outs#bwl() macros with SEV-specific checks.
Since TDX VM guests will also need similar support, use
CC_ATTR_GUEST_UNROLL_STRING_IO and generic cc_platform_has() API to
implement it.

String I/O helpers were the last users of sev_key_active() interface and
sev_enable_key static key. Remove them.

 [ bp: Move comment too and do not delete it. ]

Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lkml.kernel.org/r/20211206135505.75045-2-kirill.shutemov@linux.intel.com
---
 include/linux/cc_platform.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index a075b70b9a70..efd8205282da 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -61,6 +61,17 @@ enum cc_attr {
 	 * Examples include SEV-ES.
 	 */
 	CC_ATTR_GUEST_STATE_ENCRYPT,
+
+	/**
+	 * @CC_ATTR_GUEST_UNROLL_STRING_IO: String I/O is implemented with
+	 *                                  IN/OUT instructions
+	 *
+	 * The platform/OS is running as a guest/virtual machine and uses
+	 * IN/OUT instructions in place of string I/O.
+	 *
+	 * Examples include TDX guest & SEV.
+	 */
+	CC_ATTR_GUEST_UNROLL_STRING_IO,
 };
 
 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
-- 
cgit v1.2.3


From 52407c220c44c8dcc6aa8aa35ffc8a2db3c849a9 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 2 Dec 2021 22:35:43 -0800
Subject: drm/i915/rpl-s: Add PCI IDS for Raptor Lake S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Raptor Lake S(RPL-S) is a version 12
Display, Media and Render. For all i915
purposes it is the same as Alder Lake S (ADL-S).

Introduce RPL-S as a subplatform
of ADL-S. This patch adds PCI ids for RPL-S.

BSpec: 53655
Cc: x86@kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # arch/x86
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211203063545.2254380-2-anusha.srivatsa@intel.com
---
 include/drm/i915_pciids.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index c00ac54692d7..baf3d1d3d566 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -666,4 +666,13 @@
 	INTEL_VGA_DEVICE(0x46C2, info), \
 	INTEL_VGA_DEVICE(0x46C3, info)
 
+/* RPL-S */
+#define INTEL_RPLS_IDS(info) \
+	INTEL_VGA_DEVICE(0xA780, info), \
+	INTEL_VGA_DEVICE(0xA781, info), \
+	INTEL_VGA_DEVICE(0xA782, info), \
+	INTEL_VGA_DEVICE(0xA783, info), \
+	INTEL_VGA_DEVICE(0xA788, info), \
+	INTEL_VGA_DEVICE(0xA789, info)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From 3f9bb0301d50ce27421eff4b710c2bbe58111a83 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:47 +0200
Subject: net: dsa: make dp->bridge_num one-based
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I have seen too many bugs already due to the fact that we must encode an
invalid dp->bridge_num as a negative value, because the natural tendency
is to check that invalid value using (!dp->bridge_num). Latest example
can be seen in commit 1bec0f05062c ("net: dsa: fix bridge_num not
getting cleared after ports leaving the bridge").

Convert the existing users to assume that dp->bridge_num == 0 is the
encoding for invalid, and valid bridge numbers start from 1.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dsa/8021q.h | 6 +++---
 include/net/dsa.h         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 254b165f2b44..0af4371fbebb 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -38,13 +38,13 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id);
 
 int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
 					struct net_device *br,
-					int bridge_num);
+					unsigned int bridge_num);
 
 void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
 					   struct net_device *br,
-					   int bridge_num);
+					   unsigned int bridge_num);
 
-u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num);
+u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num);
 
 u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp);
 
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8ca9d50cbbc2..a23cfbaa09d6 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -257,7 +257,7 @@ struct dsa_port {
 	bool			learning;
 	u8			stp_state;
 	struct net_device	*bridge_dev;
-	int			bridge_num;
+	unsigned int		bridge_num;
 	struct devlink_port	devlink_port;
 	bool			devlink_port_setup;
 	struct phylink		*pl;
@@ -754,11 +754,11 @@ struct dsa_switch_ops {
 	/* Called right after .port_bridge_join() */
 	int	(*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port,
 					      struct net_device *bridge,
-					      int bridge_num);
+					      unsigned int bridge_num);
 	/* Called right before .port_bridge_leave() */
 	void	(*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port,
 						struct net_device *bridge,
-						int bridge_num);
+						unsigned int bridge_num);
 	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
 				      u8 state);
 	void	(*port_fast_age)(struct dsa_switch *ds, int port);
-- 
cgit v1.2.3


From 947c8746e2c31bb469ba9ee02dc739be6a98ee38 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:48 +0200
Subject: net: dsa: assign a bridge number even without TX forwarding offload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The service where DSA assigns a unique bridge number for each forwarding
domain is useful even for drivers which do not implement the TX
forwarding offload feature.

For example, drivers might use the dp->bridge_num for FDB isolation.

So rename ds->num_fwd_offloading_bridges to ds->max_num_bridges, and
calculate a unique bridge_num for all drivers that set this value.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index a23cfbaa09d6..00fbd87ae4ff 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -413,12 +413,12 @@ struct dsa_switch {
 	 */
 	unsigned int		num_lag_ids;
 
-	/* Drivers that support bridge forwarding offload should set this to
-	 * the maximum number of bridges spanning the same switch tree (or all
-	 * trees, in the case of cross-tree bridging support) that can be
-	 * offloaded.
+	/* Drivers that support bridge forwarding offload or FDB isolation
+	 * should set this to the maximum number of bridges spanning the same
+	 * switch tree (or all trees, in the case of cross-tree bridging
+	 * support) that can be offloaded.
 	 */
-	unsigned int		num_fwd_offloading_bridges;
+	unsigned int		max_num_bridges;
 
 	size_t num_ports;
 };
-- 
cgit v1.2.3


From 36cbf39b56908bb2e7e8e392e5f08895c3ca4326 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:52 +0200
Subject: net: dsa: hide dp->bridge_dev and dp->bridge_num in the core behind
 helpers

The location of the bridge device pointer and number is going to change.
It is not going to be kept individually per port, but in a common
structure allocated dynamically and which will have lockdep validation.

Create helpers to access these elements so that we have a migration path
to the new organization.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 00fbd87ae4ff..18bce0383267 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -599,6 +599,27 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
 	return dp->slave;
 }
 
+static inline struct net_device *
+dsa_port_bridge_dev_get(const struct dsa_port *dp)
+{
+	return dp->bridge_dev;
+}
+
+static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp)
+{
+	return dp->bridge_num;
+}
+
+static inline bool dsa_port_bridge_same(const struct dsa_port *a,
+					const struct dsa_port *b)
+{
+	struct net_device *br_a = dsa_port_bridge_dev_get(a);
+	struct net_device *br_b = dsa_port_bridge_dev_get(b);
+
+	/* Standalone ports are not in the same bridge with one another */
+	return (!br_a || !br_b) ? false : (br_a == br_b);
+}
+
 typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
 			      bool is_static, void *data);
 struct dsa_switch_ops {
-- 
cgit v1.2.3


From 6a43cba3034015d1c029c8a81b62eb9c2660fd6e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:55 +0200
Subject: net: dsa: export bridging offload helpers to drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the static inline helpers from net/dsa/dsa_priv.h to
include/net/dsa.h, so that drivers can call functions such as
dsa_port_offloads_bridge_dev(), which will be necessary after the
transition to a more complex bridge structure.

More functions than are needed right now are being moved, but this is
done for uniformity.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 18bce0383267..899e13d56fc2 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -620,6 +620,49 @@ static inline bool dsa_port_bridge_same(const struct dsa_port *a,
 	return (!br_a || !br_b) ? false : (br_a == br_b);
 }
 
+static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
+						 const struct net_device *dev)
+{
+	return dsa_port_to_bridge_port(dp) == dev;
+}
+
+static inline bool
+dsa_port_offloads_bridge_dev(struct dsa_port *dp,
+			     const struct net_device *bridge_dev)
+{
+	/* DSA ports connected to a bridge, and event was emitted
+	 * for the bridge.
+	 */
+	return dsa_port_bridge_dev_get(dp) == bridge_dev;
+}
+
+/* Returns true if any port of this tree offloads the given net_device */
+static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
+						 const struct net_device *dev)
+{
+	struct dsa_port *dp;
+
+	list_for_each_entry(dp, &dst->ports, list)
+		if (dsa_port_offloads_bridge_port(dp, dev))
+			return true;
+
+	return false;
+}
+
+/* Returns true if any port of this tree offloads the given bridge */
+static inline bool
+dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst,
+			     const struct net_device *bridge_dev)
+{
+	struct dsa_port *dp;
+
+	list_for_each_entry(dp, &dst->ports, list)
+		if (dsa_port_offloads_bridge_dev(dp, bridge_dev))
+			return true;
+
+	return false;
+}
+
 typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
 			      bool is_static, void *data);
 struct dsa_switch_ops {
-- 
cgit v1.2.3


From d3eed0e57d5d1bcbf1bd60f83a4adfe7d7b8dd9c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:56 +0200
Subject: net: dsa: keep the bridge_dev and bridge_num as part of the same
 structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The main desire behind this is to provide coherent bridge information to
the fast path without locking.

For example, right now we set dp->bridge_dev and dp->bridge_num from
separate code paths, it is theoretically possible for a packet
transmission to read these two port properties consecutively and find a
bridge number which does not correspond with the bridge device.

Another desire is to start passing more complex bridge information to
dsa_switch_ops functions. For example, with FDB isolation, it is
expected that drivers will need to be passed the bridge which requested
an FDB/MDB entry to be offloaded, and along with that bridge_dev, the
associated bridge_num should be passed too, in case the driver might
want to implement an isolation scheme based on that number.

We already pass the {bridge_dev, bridge_num} pair to the TX forwarding
offload switch API, however we'd like to remove that and squash it into
the basic bridge join/leave API. So that means we need to pass this
pair to the bridge join/leave API.

During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we
call the driver's .port_bridge_leave with what used to be our
dp->bridge_dev, but provided as an argument.

When bridge_dev and bridge_num get folded into a single structure, we
need to preserve this behavior in dsa_port_bridge_leave: we need a copy
of what used to be in dp->bridge.

Switch drivers check bridge membership by comparing dp->bridge_dev with
the provided bridge_dev, but now, if we provide the struct dsa_bridge as
a pointer, they cannot keep comparing dp->bridge to the provided
pointer, since this only points to an on-stack copy. To make this
obvious and prevent driver writers from forgetting and doing stupid
things, in this new API, the struct dsa_bridge is provided as a full
structure (not very large, contains an int and a pointer) instead of a
pointer. An explicit comparison function needs to be used to determine
bridge membership: dsa_port_offloads_bridge().

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dsa/8021q.h |  7 +++----
 include/net/dsa.h         | 34 +++++++++++++++++++++-------------
 2 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 0af4371fbebb..939a1beaddf7 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -7,6 +7,7 @@
 
 #include <linux/refcount.h>
 #include <linux/types.h>
+#include <net/dsa.h>
 
 struct dsa_switch;
 struct dsa_port;
@@ -37,12 +38,10 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id);
 
 int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
-					struct net_device *br,
-					unsigned int bridge_num);
+					struct dsa_bridge bridge);
 
 void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
-					   struct net_device *br,
-					   unsigned int bridge_num);
+					   struct dsa_bridge bridge);
 
 u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num);
 
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 899e13d56fc2..b9789c0cd5e3 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -219,6 +219,11 @@ struct dsa_mall_tc_entry {
 	};
 };
 
+struct dsa_bridge {
+	struct net_device *dev;
+	unsigned int num;
+	refcount_t refcount;
+};
 
 struct dsa_port {
 	/* A CPU port is physically connected to a master device.
@@ -256,8 +261,7 @@ struct dsa_port {
 	/* Managed by DSA on user ports and by drivers on CPU and DSA ports */
 	bool			learning;
 	u8			stp_state;
-	struct net_device	*bridge_dev;
-	unsigned int		bridge_num;
+	struct dsa_bridge	*bridge;
 	struct devlink_port	devlink_port;
 	bool			devlink_port_setup;
 	struct phylink		*pl;
@@ -588,7 +592,7 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
 static inline
 struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
 {
-	if (!dp->bridge_dev)
+	if (!dp->bridge)
 		return NULL;
 
 	if (dp->lag_dev)
@@ -602,12 +606,12 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
 static inline struct net_device *
 dsa_port_bridge_dev_get(const struct dsa_port *dp)
 {
-	return dp->bridge_dev;
+	return dp->bridge ? dp->bridge->dev : NULL;
 }
 
 static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp)
 {
-	return dp->bridge_num;
+	return dp->bridge ? dp->bridge->num : 0;
 }
 
 static inline bool dsa_port_bridge_same(const struct dsa_port *a,
@@ -636,6 +640,12 @@ dsa_port_offloads_bridge_dev(struct dsa_port *dp,
 	return dsa_port_bridge_dev_get(dp) == bridge_dev;
 }
 
+static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
+					    const struct dsa_bridge *bridge)
+{
+	return dsa_port_bridge_dev_get(dp) == bridge->dev;
+}
+
 /* Returns true if any port of this tree offloads the given net_device */
 static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
 						 const struct net_device *dev)
@@ -812,17 +822,15 @@ struct dsa_switch_ops {
 	 */
 	int	(*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs);
 	int	(*port_bridge_join)(struct dsa_switch *ds, int port,
-				    struct net_device *bridge);
+				    struct dsa_bridge bridge);
 	void	(*port_bridge_leave)(struct dsa_switch *ds, int port,
-				     struct net_device *bridge);
+				     struct dsa_bridge bridge);
 	/* Called right after .port_bridge_join() */
 	int	(*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port,
-					      struct net_device *bridge,
-					      unsigned int bridge_num);
+					      struct dsa_bridge bridge);
 	/* Called right before .port_bridge_leave() */
 	void	(*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port,
-						struct net_device *bridge,
-						unsigned int bridge_num);
+						struct dsa_bridge bridge);
 	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
 				      u8 state);
 	void	(*port_fast_age)(struct dsa_switch *ds, int port);
@@ -894,10 +902,10 @@ struct dsa_switch_ops {
 	 */
 	int	(*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index,
 					 int sw_index, int port,
-					 struct net_device *br);
+					 struct dsa_bridge bridge);
 	void	(*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index,
 					  int sw_index, int port,
-					  struct net_device *br);
+					  struct dsa_bridge bridge);
 	int	(*crosschip_lag_change)(struct dsa_switch *ds, int sw_index,
 					int port);
 	int	(*crosschip_lag_join)(struct dsa_switch *ds, int sw_index,
-- 
cgit v1.2.3


From b079922ba2acf072b23d82fa246a0d8de198f0a2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:57 +0200
Subject: net: dsa: add a "tx_fwd_offload" argument to ->port_bridge_join
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a preparation patch for the removal of the DSA switch methods
->port_bridge_tx_fwd_offload() and ->port_bridge_tx_fwd_unoffload().
The plan is for the switch to report whether it offloads TX forwarding
directly as a response to the ->port_bridge_join() method.

This change deals with the noisy portion of converting all existing
function prototypes to take this new boolean pointer argument.
The bool is placed in the cross-chip notifier structure for bridge join,
and a reference to it is provided to drivers. In the next change, DSA
will then actually look at this value instead of calling
->port_bridge_tx_fwd_offload().

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b9789c0cd5e3..584b3f9462a0 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -822,7 +822,8 @@ struct dsa_switch_ops {
 	 */
 	int	(*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs);
 	int	(*port_bridge_join)(struct dsa_switch *ds, int port,
-				    struct dsa_bridge bridge);
+				    struct dsa_bridge bridge,
+				    bool *tx_fwd_offload);
 	void	(*port_bridge_leave)(struct dsa_switch *ds, int port,
 				     struct dsa_bridge bridge);
 	/* Called right after .port_bridge_join() */
-- 
cgit v1.2.3


From 857fdd74fb38dad4d0333401fb1826cd8cf0df2c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 6 Dec 2021 18:57:58 +0200
Subject: net: dsa: eliminate dsa_switch_ops :: port_bridge_tx_fwd_{,un}offload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't really need new switch API for these, and with new switches
which intend to add support for this feature, it will become cumbersome
to maintain.

The change consists in restructuring the two drivers that implement this
offload (sja1105 and mv88e6xxx) such that the offload is enabled and
disabled from the ->port_bridge_{join,leave} methods instead of the old
->port_bridge_tx_fwd_{,un}offload.

The only non-trivial change is that mv88e6xxx_map_virtual_bridge_to_pvt()
has been moved to avoid a forward declaration, and the
mv88e6xxx_reg_lock() calls from inside it have been removed, since
locking is now done from mv88e6xxx_port_bridge_{join,leave}.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 584b3f9462a0..bdf308a5c55e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -222,6 +222,7 @@ struct dsa_mall_tc_entry {
 struct dsa_bridge {
 	struct net_device *dev;
 	unsigned int num;
+	bool tx_fwd_offload;
 	refcount_t refcount;
 };
 
@@ -826,12 +827,6 @@ struct dsa_switch_ops {
 				    bool *tx_fwd_offload);
 	void	(*port_bridge_leave)(struct dsa_switch *ds, int port,
 				     struct dsa_bridge bridge);
-	/* Called right after .port_bridge_join() */
-	int	(*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port,
-					      struct dsa_bridge bridge);
-	/* Called right before .port_bridge_leave() */
-	void	(*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port,
-						struct dsa_bridge bridge);
 	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
 				      u8 state);
 	void	(*port_fast_age)(struct dsa_switch *ds, int port);
-- 
cgit v1.2.3


From 283e6f5a8166f075eba78da6b867d76cc5d47e77 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 7 Dec 2021 12:21:40 +0300
Subject: net: wwan: make debugfs optional

Debugfs interface is optional for the regular modem use. Some distros
and users will want to disable this feature for security or kernel
size reasons. So add a configuration option that allows to completely
disable the debugfs interface of the WWAN devices.

A primary considered use case for this option was embedded firmwares.
For example, in OpenWrt, you can not completely disable debugfs, as a
lot of wireless stuff can only be configured and monitored with the
debugfs knobs. At the same time, reducing the size of a kernel and
modules is an essential task in the world of embedded software.
Disabling the WWAN and IOSM debugfs interfaces allows us to save 50K
(x86-64 build) of space for module storage. Not much, but already
considerable when you only have 16MB of storage.

So it is hard to just disable whole debugfs. Users need some fine
grained set of options to control which debugfs interface is important
and should be available and which is not.

The new configuration symbol is enabled by default and is hidden under
the EXPERT option. So a regular user would not be bothered by another
one configuration question. While an embedded distro maintainer will be
able to a little more reduce the final image size.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Acked-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/wwan.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 1646aa3e6779..e143c88bf4b0 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -171,6 +171,13 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
 
 void wwan_unregister_ops(struct device *parent);
 
+#ifdef CONFIG_WWAN_DEBUGFS
 struct dentry *wwan_get_debugfs_dir(struct device *parent);
+#else
+static inline struct dentry *wwan_get_debugfs_dir(struct device *parent)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 
 #endif /* __WWAN_H */
-- 
cgit v1.2.3


From 469407a3b5ed9390cfacb0363d1cc926a51f6a14 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 9 Dec 2021 09:29:18 +0800
Subject: erofs: clean up erofs_map_blocks tracepoints

Since the new type of chunk-based files is introduced, there is no
need to leave flatmode tracepoints.

Rename to erofs_map_blocks instead.

Link: https://lore.kernel.org/r/20211209012918.30337-1-hsiangkao@linux.alibaba.com
Reviewed-by: Yue Hu <huyue2@yulong.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/trace/events/erofs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index 16ae7b666810..57de057bd503 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -169,7 +169,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
 		  __entry->flags ? show_map_flags(__entry->flags) : "NULL")
 );
 
-DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
+DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter,
 	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
 		 unsigned flags),
 
@@ -221,7 +221,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
 		  show_mflags(__entry->mflags), __entry->ret)
 );
 
-DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit,
+DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit,
 	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
 		 unsigned flags, int ret),
 
-- 
cgit v1.2.3


From 1197528aaea79ed4909aba695d18fdecc5387a36 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:28 +0100
Subject: genirq/msi: Guard sysfs code

No point in building unused code when CONFIG_SYSFS=n.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20211206210223.985907940@linutronix.de
---
 include/linux/msi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index e616f94c7c58..d43b9469c88b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -239,9 +239,19 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void pci_msi_mask_irq(struct irq_data *data);
 void pci_msi_unmask_irq(struct irq_data *data);
 
+#ifdef CONFIG_SYSFS
 const struct attribute_group **msi_populate_sysfs(struct device *dev);
 void msi_destroy_sysfs(struct device *dev,
 		       const struct attribute_group **msi_irq_groups);
+#else
+static inline const struct attribute_group **msi_populate_sysfs(struct device *dev)
+{
+	return NULL;
+}
+static inline void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups)
+{
+}
+#endif
 
 /*
  * The arch hooks to setup up msi irqs. Default functions are implemented
-- 
cgit v1.2.3


From 1dd2c6a0817fd08f80dee75d7d3bd99a0c4b828d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:29 +0100
Subject: genirq/msi: Remove unused domain callbacks

No users and there is no need to grow them.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211126223824.322987915@linutronix.de
Link: https://lore.kernel.org/r/20211206210224.041777889@linutronix.de
---
 include/linux/msi.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index d43b9469c88b..4b962f73f84a 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -304,7 +304,6 @@ struct msi_domain_info;
  * @msi_free:		Domain specific function to free a MSI interrupts
  * @msi_check:		Callback for verification of the domain/info/dev data
  * @msi_prepare:	Prepare the allocation of the interrupts in the domain
- * @msi_finish:		Optional callback to finalize the allocation
  * @set_desc:		Set the msi descriptor for an interrupt
  * @handle_error:	Optional error handler if the allocation fails
  * @domain_alloc_irqs:	Optional function to override the default allocation
@@ -312,12 +311,11 @@ struct msi_domain_info;
  * @domain_free_irqs:	Optional function to override the default free
  *			function.
  *
- * @get_hwirq, @msi_init and @msi_free are callbacks used by
- * msi_create_irq_domain() and related interfaces
+ * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
+ * irqdomain.
  *
- * @msi_check, @msi_prepare, @msi_finish, @set_desc and @handle_error
- * are callbacks used by msi_domain_alloc_irqs() and related
- * interfaces which are based on msi_desc.
+ * @msi_check, @msi_prepare, @handle_error and @set_desc are callbacks used by
+ * msi_domain_alloc/free_irqs().
  *
  * @domain_alloc_irqs, @domain_free_irqs can be used to override the
  * default allocation/free functions (__msi_domain_alloc/free_irqs). This
@@ -351,7 +349,6 @@ struct msi_domain_ops {
 	int		(*msi_prepare)(struct irq_domain *domain,
 				       struct device *dev, int nvec,
 				       msi_alloc_info_t *arg);
-	void		(*msi_finish)(msi_alloc_info_t *arg, int retval);
 	void		(*set_desc)(msi_alloc_info_t *arg,
 				    struct msi_desc *desc);
 	int		(*handle_error)(struct irq_domain *domain,
-- 
cgit v1.2.3


From 3ba1f050c91d5ce3672dbf3a55dc2451c0b342e2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:31 +0100
Subject: genirq/msi: Fixup includes

Remove the kobject.h include from msi.h as it's not required and add a
sysfs.h include to the core code instead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20211206210224.103502021@linutronix.de
---
 include/linux/msi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4b962f73f84a..5c627750f269 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -2,7 +2,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
-#include <linux/kobject.h>
+#include <linux/cpumask.h>
 #include <linux/list.h>
 #include <asm/msi.h>
 
-- 
cgit v1.2.3


From 9e8688c5f2990dadcf83728cd00a7e8497fc6aa9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:33 +0100
Subject: PCI/MSI: Make pci_msi_domain_write_msg() static

There is no point to have this function public as it is set by the PCI core
anyway when a PCI/MSI irqdomain is created.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# PCI
Link: https://lore.kernel.org/r/20211206210224.157070464@linutronix.de
---
 include/linux/msi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5c627750f269..d7b143a79cb4 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -455,7 +455,6 @@ void *platform_msi_get_host_data(struct irq_domain *domain);
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
 
 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
-void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg);
 struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 					     struct msi_domain_info *info,
 					     struct irq_domain *parent);
-- 
cgit v1.2.3


From ade044a3d0f0389e4f916337c505550acc3fd011 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:34 +0100
Subject: PCI/MSI: Remove msi_desc_to_pci_sysdata()

Last user is gone long ago.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211206210224.210768199@linutronix.de
---
 include/linux/msi.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index d7b143a79cb4..ac6fec105edc 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -218,13 +218,8 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc,
 	for_each_msi_entry((desc), &(pdev)->dev)
 
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc);
-void *msi_desc_to_pci_sysdata(struct msi_desc *desc);
 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
 #else /* CONFIG_PCI_MSI */
-static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
-{
-	return NULL;
-}
 static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
 }
-- 
cgit v1.2.3


From e58f2259b91c02974c20db7b28d39d810a21249b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:39 +0100
Subject: genirq/msi, treewide: Use a named struct for PCI/MSI attributes

The unnamed struct sucks and is in the way of further cleanups. Stick the
PCI related MSI data into a real data structure and cleanup all users.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20211206210224.374863119@linutronix.de
---
 include/linux/msi.h | 84 ++++++++++++++++++++++++++---------------------------
 1 file changed, 41 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index ac6fec105edc..7e5c13f4e41b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -68,6 +68,42 @@ static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
 typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc,
 				    struct msi_msg *msg);
 
+/**
+ * pci_msi_desc - PCI/MSI specific MSI descriptor data
+ *
+ * @msi_mask:	[PCI MSI]   MSI cached mask bits
+ * @msix_ctrl:	[PCI MSI-X] MSI-X cached per vector control bits
+ * @is_msix:	[PCI MSI/X] True if MSI-X
+ * @multiple:	[PCI MSI/X] log2 num of messages allocated
+ * @multi_cap:	[PCI MSI/X] log2 num of messages supported
+ * @can_mask:	[PCI MSI/X] Masking supported?
+ * @is_64:	[PCI MSI/X] Address size: 0=32bit 1=64bit
+ * @entry_nr:	[PCI MSI/X] Entry which is described by this descriptor
+ * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq
+ * @mask_pos:	[PCI MSI]   Mask register position
+ * @mask_base:	[PCI MSI-X] Mask register base address
+ */
+struct pci_msi_desc {
+	union {
+		u32 msi_mask;
+		u32 msix_ctrl;
+	};
+	struct {
+		u8	is_msix		: 1;
+		u8	multiple	: 3;
+		u8	multi_cap	: 3;
+		u8	can_mask	: 1;
+		u8	is_64		: 1;
+		u8	is_virtual	: 1;
+		u16	entry_nr;
+		unsigned default_irq;
+	} msi_attrib;
+	union {
+		u8	mask_pos;
+		void __iomem *mask_base;
+	};
+};
+
 /**
  * platform_msi_desc - Platform device specific msi descriptor data
  * @msi_priv_data:	Pointer to platform private data
@@ -107,17 +143,7 @@ struct ti_sci_inta_msi_desc {
  *			address or data changes
  * @write_msi_msg_data:	Data parameter for the callback.
  *
- * @msi_mask:	[PCI MSI]   MSI cached mask bits
- * @msix_ctrl:	[PCI MSI-X] MSI-X cached per vector control bits
- * @is_msix:	[PCI MSI/X] True if MSI-X
- * @multiple:	[PCI MSI/X] log2 num of messages allocated
- * @multi_cap:	[PCI MSI/X] log2 num of messages supported
- * @maskbit:	[PCI MSI/X] Mask-Pending bit supported?
- * @is_64:	[PCI MSI/X] Address size: 0=32bit 1=64bit
- * @entry_nr:	[PCI MSI/X] Entry which is described by this descriptor
- * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq
- * @mask_pos:	[PCI MSI]   Mask register position
- * @mask_base:	[PCI MSI-X] Mask register base address
+ * @pci:	[PCI]	    PCI speficic msi descriptor data
  * @platform:	[platform]  Platform device specific msi descriptor data
  * @fsl_mc:	[fsl-mc]    FSL MC device specific msi descriptor data
  * @inta:	[INTA]	    TISCI based INTA specific msi descriptor data
@@ -138,38 +164,10 @@ struct msi_desc {
 	void *write_msi_msg_data;
 
 	union {
-		/* PCI MSI/X specific data */
-		struct {
-			union {
-				u32 msi_mask;
-				u32 msix_ctrl;
-			};
-			struct {
-				u8	is_msix		: 1;
-				u8	multiple	: 3;
-				u8	multi_cap	: 3;
-				u8	can_mask	: 1;
-				u8	is_64		: 1;
-				u8	is_virtual	: 1;
-				u16	entry_nr;
-				unsigned default_irq;
-			} msi_attrib;
-			union {
-				u8	mask_pos;
-				void __iomem *mask_base;
-			};
-		};
-
-		/*
-		 * Non PCI variants add their data structure here. New
-		 * entries need to use a named structure. We want
-		 * proper name spaces for this. The PCI part is
-		 * anonymous for now as it would require an immediate
-		 * tree wide cleanup.
-		 */
-		struct platform_msi_desc platform;
-		struct fsl_mc_msi_desc fsl_mc;
-		struct ti_sci_inta_msi_desc inta;
+		struct pci_msi_desc		pci;
+		struct platform_msi_desc	platform;
+		struct fsl_mc_msi_desc		fsl_mc;
+		struct ti_sci_inta_msi_desc	inta;
 	};
 };
 
-- 
cgit v1.2.3


From ae72f3156729541581f526b85883ca53a20df2fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:42 +0100
Subject: PCI/MSI: Make arch_restore_msi_irqs() less horrible.

Make arch_restore_msi_irqs() return a boolean which indicates whether the
core code should restore the MSI message or not. Get rid of the indirection
in x86.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# PCI
Link: https://lore.kernel.org/r/20211206210224.485668098@linutronix.de
---
 include/linux/msi.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7e5c13f4e41b..673899fc24f6 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -273,11 +273,10 @@ static inline void arch_teardown_msi_irqs(struct pci_dev *dev)
 #endif
 
 /*
- * The restore hooks are still available as they are useful even
- * for fully irq domain based setups. Courtesy to XEN/X86.
+ * The restore hook is still available even for fully irq domain based
+ * setups. Courtesy to XEN/X86.
  */
-void arch_restore_msi_irqs(struct pci_dev *dev);
-void default_restore_msi_irqs(struct pci_dev *dev);
+bool arch_restore_msi_irqs(struct pci_dev *dev);
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
 
-- 
cgit v1.2.3


From aa423ac4221abdfb8588751e7838ca5f42f56db3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:52 +0100
Subject: PCI/MSI: Split out irqdomain code

Move the irqdomain specific code into its own file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211206210224.817754783@linutronix.de
---
 include/linux/msi.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 673899fc24f6..7ff7cf23b78d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -259,17 +259,6 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
-#else
-static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	WARN_ON_ONCE(1);
-	return -ENODEV;
-}
-
-static inline void arch_teardown_msi_irqs(struct pci_dev *dev)
-{
-	WARN_ON_ONCE(1);
-}
 #endif
 
 /*
-- 
cgit v1.2.3


From 85aa607e79f8343f1ea028b29bdf8b6bc99c729a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:54 +0100
Subject: PCI/MSI: Sanitize MSI-X table map handling

Unmapping the MSI-X base mapping in the loops which allocate/free MSI
descriptors is daft and in the way of allowing runtime expansion of MSI-X
descriptors.

Store the mapping in struct pci_dev and free it after freeing the MSI-X
descriptors.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211206210224.871651518@linutronix.de
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 18a75c8e615c..8cb103677f5a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -473,6 +473,7 @@ struct pci_dev {
 	u8		ptm_granularity;
 #endif
 #ifdef CONFIG_PCI_MSI
+	void __iomem	*msix_base;
 	const struct attribute_group **msi_irq_groups;
 #endif
 	struct pci_vpd	vpd;
-- 
cgit v1.2.3


From cd119b09a87d8beb50356d8c5c6aa42d89c44eb7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:56 +0100
Subject: PCI/MSI: Move msi_lock to struct pci_dev

It's only required for PCI/MSI. So no point in having it in every struct
device.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211206210224.925241961@linutronix.de
---
 include/linux/device.h | 2 --
 include/linux/pci.h    | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index e270cb740b9e..2a22875238a6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -407,7 +407,6 @@ struct dev_links_info {
  * @em_pd:	device's energy model performance domain
  * @pins:	For device pin management.
  *		See Documentation/driver-api/pin-control.rst for details.
- * @msi_lock:	Lock to protect MSI mask cache and mask register
  * @msi_list:	Hosts MSI descriptors
  * @msi_domain: The generic MSI domain this device is using.
  * @numa_node:	NUMA node this device is close to.
@@ -508,7 +507,6 @@ struct device {
 	struct dev_pin_info	*pins;
 #endif
 #ifdef CONFIG_GENERIC_MSI_IRQ
-	raw_spinlock_t		msi_lock;
 	struct list_head	msi_list;
 #endif
 #ifdef CONFIG_DMA_OPS
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8cb103677f5a..5cc46baef519 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -474,6 +474,7 @@ struct pci_dev {
 #endif
 #ifdef CONFIG_PCI_MSI
 	void __iomem	*msix_base;
+	raw_spinlock_t	msi_lock;
 	const struct attribute_group **msi_irq_groups;
 #endif
 	struct pci_vpd	vpd;
-- 
cgit v1.2.3


From 57ce3a3c99b21e9c4f951ef01e0a3603c987c259 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:57 +0100
Subject: PCI/MSI: Make pci_msi_domain_check_cap() static

No users outside of that file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211206210224.980989243@linutronix.de
---
 include/linux/msi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7ff7cf23b78d..5248678e05d1 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -439,8 +439,6 @@ void *platform_msi_get_host_data(struct irq_domain *domain);
 struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 					     struct msi_domain_info *info,
 					     struct irq_domain *parent);
-int pci_msi_domain_check_cap(struct irq_domain *domain,
-			     struct msi_domain_info *info, struct device *dev);
 u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
 struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
 bool pci_dev_has_special_msi_domain(struct pci_dev *pdev);
-- 
cgit v1.2.3


From 890337624e1fa2da079fc1c036a62d178c985280 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:27:59 +0100
Subject: genirq/msi: Handle PCI/MSI allocation fail in core code

Get rid of yet another irqdomain callback and let the core code return the
already available information of how many descriptors could be allocated.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# PCI
Link: https://lore.kernel.org/r/20211206210225.046615302@linutronix.de
---
 include/linux/msi.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5248678e05d1..ba4a39c430b5 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -286,7 +286,6 @@ struct msi_domain_info;
  * @msi_check:		Callback for verification of the domain/info/dev data
  * @msi_prepare:	Prepare the allocation of the interrupts in the domain
  * @set_desc:		Set the msi descriptor for an interrupt
- * @handle_error:	Optional error handler if the allocation fails
  * @domain_alloc_irqs:	Optional function to override the default allocation
  *			function.
  * @domain_free_irqs:	Optional function to override the default free
@@ -295,7 +294,7 @@ struct msi_domain_info;
  * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
  * irqdomain.
  *
- * @msi_check, @msi_prepare, @handle_error and @set_desc are callbacks used by
+ * @msi_check, @msi_prepare and @set_desc are callbacks used by
  * msi_domain_alloc/free_irqs().
  *
  * @domain_alloc_irqs, @domain_free_irqs can be used to override the
@@ -332,8 +331,6 @@ struct msi_domain_ops {
 				       msi_alloc_info_t *arg);
 	void		(*set_desc)(msi_alloc_info_t *arg,
 				    struct msi_desc *desc);
-	int		(*handle_error)(struct irq_domain *domain,
-					struct msi_desc *desc, int error);
 	int		(*domain_alloc_irqs)(struct irq_domain *domain,
 					     struct device *dev, int nvec);
 	void		(*domain_free_irqs)(struct irq_domain *domain,
-- 
cgit v1.2.3


From ae710a458f0af6ba2b991ebdddffc66e8dbd765a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 10 Nov 2021 12:24:23 +0200
Subject: drm: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20211110102423.54282-1-andriy.shevchenko@linux.intel.com
---
 include/drm/drm_gem_ttm_helper.h  | 2 +-
 include/drm/drm_gem_vram_helper.h | 2 +-
 include/drm/drm_mm.h              | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_gem_ttm_helper.h b/include/drm/drm_gem_ttm_helper.h
index c1aa02bd4c89..78040f6cc6f3 100644
--- a/include/drm/drm_gem_ttm_helper.h
+++ b/include/drm/drm_gem_ttm_helper.h
@@ -3,7 +3,7 @@
 #ifndef DRM_GEM_TTM_HELPER_H
 #define DRM_GEM_TTM_HELPER_H
 
-#include <linux/kernel.h>
+#include <linux/container_of.h>
 
 #include <drm/drm_device.h>
 #include <drm/drm_gem.h>
diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h
index d3cf06c9af65..b4ce27a72773 100644
--- a/include/drm/drm_gem_vram_helper.h
+++ b/include/drm/drm_gem_vram_helper.h
@@ -11,8 +11,8 @@
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
 
+#include <linux/container_of.h>
 #include <linux/dma-buf-map.h>
-#include <linux/kernel.h> /* for container_of() */
 
 struct drm_mode_create_dumb;
 struct drm_plane;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 9b4292f229c6..ac33ba1b18bc 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -39,13 +39,15 @@
  */
 #include <linux/bug.h>
 #include <linux/rbtree.h>
-#include <linux/kernel.h>
+#include <linux/limits.h>
 #include <linux/mm_types.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #ifdef CONFIG_DRM_DEBUG_MM
 #include <linux/stackdepot.h>
 #endif
+#include <linux/types.h>
+
 #include <drm/drm_print.h>
 
 #ifdef CONFIG_DRM_DEBUG_MM
-- 
cgit v1.2.3


From 50468e4313355b161cac8a5155a45832995b7f25 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Tue, 16 Nov 2021 18:21:16 +0200
Subject: x86/sgx: Add an attribute for the amount of SGX memory in a NUMA node

== Problem ==

The amount of SGX memory on a system is determined by the BIOS and it
varies wildly between systems.  It can be as small as dozens of MB's
and as large as many GB's on servers.  Just like how applications need
to know how much regular RAM is available, enclave builders need to
know how much SGX memory an enclave can consume.

== Solution ==

Introduce a new sysfs file:

	/sys/devices/system/node/nodeX/x86/sgx_total_bytes

to enumerate the amount of SGX memory available in each NUMA node.
This serves the same function for SGX as /proc/meminfo or
/sys/devices/system/node/nodeX/meminfo does for normal RAM.

'sgx_total_bytes' is needed today to help drive the SGX selftests.
SGX-specific swap code is exercised by creating overcommitted enclaves
which are larger than the physical SGX memory on the system.  They
currently use a CPUID-based approach which can diverge from the actual
amount of SGX memory available.  'sgx_total_bytes' ensures that the
selftests can work efficiently and do not attempt stupid things like
creating a 100,000 MB enclave on a system with 128 MB of SGX memory.

== Implementation Details ==

Introduce CONFIG_HAVE_ARCH_NODE_DEV_GROUP opt-in flag to expose an
arch specific attribute group, and add an attribute for the amount of
SGX memory in bytes to each NUMA node:

== ABI Design Discussion ==

As opposed to the per-node ABI, a single, global ABI was considered.
However, this would prevent enclaves from being able to size
themselves so that they fit on a single NUMA node.  Essentially, a
single value would rule out NUMA optimizations for enclaves.

Create a new "x86/" directory inside each "nodeX/" sysfs directory.
'sgx_total_bytes' is expected to be the first of at least a few
sgx-specific files to be placed in the new directory.  Just scanning
/proc/meminfo, these are the no-brainers that we have for RAM, but we
need for SGX:

	MemTotal:       xxxx kB // sgx_total_bytes (implemented here)
	MemFree:        yyyy kB // sgx_free_bytes
	SwapTotal:      zzzz kB // sgx_swapped_bytes

So, at *least* three.  I think we will eventually end up needing
something more along the lines of a dozen.  A new directory (as
opposed to being in the nodeX/ "root") directory avoids cluttering the
root with several "sgx_*" files.

Place the new file in a new "nodeX/x86/" directory because SGX is
highly x86-specific.  It is very unlikely that any other architecture
(or even non-Intel x86 vendor) will ever implement SGX.  Using "sgx/"
as opposed to "x86/" was also considered.  But, there is a real chance
this can get used for other arch-specific purposes.

[ dhansen: rewrite changelog ]

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20211116162116.93081-2-jarkko@kernel.org
---
 include/linux/numa.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/numa.h b/include/linux/numa.h
index cb44cfe2b725..59df211d051f 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -58,4 +58,8 @@ static inline int phys_to_target_node(u64 start)
 }
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP
+extern const struct attribute_group arch_node_dev_group;
+#endif
+
 #endif /* _LINUX_NUMA_H */
-- 
cgit v1.2.3


From 67b967ddd93d0ed57d392a00f6f90060f0910c0e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 18 Nov 2021 12:46:59 +0100
Subject: mtd: Introduce an expert mode for forensics and debugging purposes

When developping NAND controller drivers or when debugging filesystem
corruptions, it is quite common to need hacking locally into the
MTD/NAND core in order to get access to the content of the bad
blocks. Instead of having multiple implementations out there let's
provide a simple yet effective specific MTD-wide debugfs entry to fully
disable these checks on purpose.

A warning is added to inform the user when this mode gets enabled.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20211118114659.1282855-1-miquel.raynal@bootlin.com
---
 include/linux/mtd/mtd.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f5e7dfc2e4e9..1ffa933121f6 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -711,4 +711,7 @@ static inline int mtd_is_bitflip_or_eccerr(int err) {
 
 unsigned mtd_mmap_capabilities(struct mtd_info *mtd);
 
+extern char *mtd_expert_analysis_warning;
+extern bool mtd_expert_analysis_mode;
+
 #endif /* __MTD_MTD_H__ */
-- 
cgit v1.2.3


From cab2d3fd6866e089b5c50db09dece131f85bfebd Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Thu, 9 Dec 2021 18:46:33 +0530
Subject: bus: mhi: core: Add support for forced PM resume

For whatever reason, some devices like QCA6390, WCN6855 using ath11k
are not in M3 state during PM resume, but still functional. The
mhi_pm_resume should then not fail in those cases, and let the higher
level device specific stack continue resuming process.

Add an API mhi_pm_resume_force(), to force resuming irrespective of the
current MHI state. This fixes a regression with non functional ath11k WiFi
after suspend/resume cycle on some machines.

Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179

Link: https://lore.kernel.org/regressions/871r5p0x2u.fsf@codeaurora.org/
Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
Cc: stable@vger.kernel.org #5.13
Reported-by: Kalle Valo <kvalo@codeaurora.org>
Reported-by: Pengyu Ma <mapengyu@gmail.com>
Tested-by: Kalle Valo <kvalo@kernel.org>
Acked-by: Kalle Valo <kvalo@kernel.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
[mani: Switched to API, added bug report, reported-by tags and CCed stable]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20211209131633.4168-1-manivannan.sadhasivam@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mhi.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 723985879035..a5cc4cdf9cc8 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl);
  */
 int mhi_pm_resume(struct mhi_controller *mhi_cntrl);
 
+/**
+ * mhi_pm_resume_force - Force resume MHI from suspended state
+ * @mhi_cntrl: MHI controller
+ *
+ * Resume the device irrespective of its MHI state. As per the MHI spec, devices
+ * has to be in M3 state during resume. But some devices seem to be in a
+ * different MHI state other than M3 but they continue working fine if allowed.
+ * This API is intented to be used for such devices.
+ *
+ * Return: 0 if the resume succeeds, a negative error code otherwise
+ */
+int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl);
+
 /**
  * mhi_download_rddm_image - Download ramdump image from device for
  *                           debugging purpose.
-- 
cgit v1.2.3


From dc70ec217cec504e6f8fee8fd91bf5c118af05f2 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sun, 21 Nov 2021 12:54:40 +0000
Subject: KVM: Introduce CONFIG_HAVE_KVM_DIRTY_RING

I'd like to make the build include dirty_ring.c based on whether the
arch wants it or not. That's a whole lot simpler if there's a config
symbol instead of doing it implicitly on KVM_DIRTY_LOG_PAGE_OFFSET
being set to something non-zero.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211121125451.9489-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_dirty_ring.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 120e5e90fa1d..4da8d4a4140b 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -27,9 +27,9 @@ struct kvm_dirty_ring {
 	int index;
 };
 
-#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+#ifndef CONFIG_HAVE_KVM_DIRTY_RING
 /*
- * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should
  * not be included as well, so define these nop functions for the arch.
  */
 static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
@@ -74,7 +74,7 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
 	return true;
 }
 
-#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+#else /* CONFIG_HAVE_KVM_DIRTY_RING */
 
 u32 kvm_dirty_ring_get_rsvd_entries(void);
 int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
@@ -98,6 +98,6 @@ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
 void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
 bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
 
-#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+#endif /* CONFIG_HAVE_KVM_DIRTY_RING */
 
 #endif	/* KVM_DIRTY_RING_H */
-- 
cgit v1.2.3


From abaad3d95b5117a17886d37cf0228712801cd259 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 6 Dec 2021 12:26:17 -0500
Subject: drm/vmwgfx: Allow checking for gl43 contexts

To make sure we're running on top of hardware that can support
GL4.3 we need to add a way of querying for those capabilities.
DRM_VMW_PARAM_GL43 allows userspace to check for presence of
GL4.3 capable contexts.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211206172620.3139754-10-zack@kde.org
---
 include/uapi/drm/vmwgfx_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 9078775feb51..8277644c1144 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -110,6 +110,7 @@ extern "C" {
 #define DRM_VMW_PARAM_HW_CAPS2         13
 #define DRM_VMW_PARAM_SM4_1            14
 #define DRM_VMW_PARAM_SM5              15
+#define DRM_VMW_PARAM_GL43             16
 
 /**
  * enum drm_vmw_handle_type - handle type for ref ioctls
-- 
cgit v1.2.3


From 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 8 Dec 2021 17:04:51 -0800
Subject: wait: add wake_up_pollfree()

Several ->poll() implementations are special in that they use a
waitqueue whose lifetime is the current task, rather than the struct
file as is normally the case.  This is okay for blocking polls, since a
blocking poll occurs within one task; however, non-blocking polls
require another solution.  This solution is for the queue to be cleared
before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'.

However, that has a bug: wake_up_poll() calls __wake_up() with
nr_exclusive=1.  Therefore, if there are multiple "exclusive" waiters,
and the wakeup function for the first one returns a positive value, only
that one will be called.  That's *not* what's needed for POLLFREE;
POLLFREE is special in that it really needs to wake up everyone.

Considering the three non-blocking poll systems:

- io_uring poll doesn't handle POLLFREE at all, so it is broken anyway.

- aio poll is unaffected, since it doesn't support exclusive waits.
  However, that's fragile, as someone could add this feature later.

- epoll doesn't appear to be broken by this, since its wakeup function
  returns 0 when it sees POLLFREE.  But this is fragile.

Although there is a workaround (see epoll), it's better to define a
function which always sends POLLFREE to all waiters.  Add such a
function.  Also make it verify that the queue really becomes empty after
all waiters have been woken up.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/wait.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2d0df57c9902..851e07da2583 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void
 void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
+void __wake_up_pollfree(struct wait_queue_head *wq_head);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
 #define wake_up_interruptible_sync_poll_locked(x, m)				\
 	__wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
 
+/**
+ * wake_up_pollfree - signal that a polled waitqueue is going away
+ * @wq_head: the wait queue head
+ *
+ * In the very rare cases where a ->poll() implementation uses a waitqueue whose
+ * lifetime is tied to a task rather than to the 'struct file' being polled,
+ * this function must be called before the waitqueue is freed so that
+ * non-blocking polls (e.g. epoll) are notified that the queue is going away.
+ *
+ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via
+ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU.
+ */
+static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
+{
+	/*
+	 * For performance reasons, we don't always take the queue lock here.
+	 * Therefore, we might race with someone removing the last entry from
+	 * the queue, and proceed while they still hold the queue lock.
+	 * However, rcu_read_lock() is required to be held in such cases, so we
+	 * can safely proceed with an RCU-delayed free.
+	 */
+	if (waitqueue_active(wq_head))
+		__wake_up_pollfree(wq_head);
+}
+
 #define ___wait_cond_timeout(condition)						\
 ({										\
 	bool __cond = (condition);						\
-- 
cgit v1.2.3


From 50252e4b5e989ce64555c7aef7516bdefc2fea72 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 8 Dec 2021 17:04:55 -0800
Subject: aio: fix use-after-free due to missing POLLFREE handling

signalfd_poll() and binder_poll() are special in that they use a
waitqueue whose lifetime is the current task, rather than the struct
file as is normally the case.  This is okay for blocking polls, since a
blocking poll occurs within one task; however, non-blocking polls
require another solution.  This solution is for the queue to be cleared
before it is freed, by sending a POLLFREE notification to all waiters.

Unfortunately, only eventpoll handles POLLFREE.  A second type of
non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't
handle POLLFREE.  This allows a use-after-free to occur if a signalfd or
binder fd is polled with aio poll, and the waitqueue gets freed.

Fix this by making aio poll handle POLLFREE.

A patch by Ramji Jiyani <ramjiyani@google.com>
(https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com)
tried to do this by making aio_poll_wake() always complete the request
inline if POLLFREE is seen.  However, that solution had two bugs.
First, it introduced a deadlock, as it unconditionally locked the aio
context while holding the waitqueue lock, which inverts the normal
locking order.  Second, it didn't consider that POLLFREE notifications
are missed while the request has been temporarily de-queued.

The second problem was solved by my previous patch.  This patch then
properly fixes the use-after-free by handling POLLFREE in a
deadlock-free way.  It does this by taking advantage of the fact that
freeing of the waitqueue is RCU-delayed, similar to what eventpoll does.

Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL")
Cc: <stable@vger.kernel.org> # v4.18+
Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/uapi/asm-generic/poll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h
index 41b509f410bf..f9c520ce4bf4 100644
--- a/include/uapi/asm-generic/poll.h
+++ b/include/uapi/asm-generic/poll.h
@@ -29,7 +29,7 @@
 #define POLLRDHUP       0x2000
 #endif
 
-#define POLLFREE	(__force __poll_t)0x4000	/* currently only for epoll */
+#define POLLFREE	(__force __poll_t)0x4000
 
 #define POLL_BUSY_LOOP	(__force __poll_t)0x8000
 
-- 
cgit v1.2.3


From 6abfaaf124a81b7d2ab132cc2c9885baa14171e5 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Wed, 27 Oct 2021 16:18:45 +0200
Subject: fs_parse: allow parameter value to be empty

Allow parameter value to be empty by specifying fs_param_can_be_empty
flag.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Link: https://lore.kernel.org/r/20211027141857.33657-2-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/fs_parser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index aab0ffc6bac6..f103c91139d4 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -42,7 +42,7 @@ struct fs_parameter_spec {
 	u8			opt;	/* Option number (returned by fs_parse()) */
 	unsigned short		flags;
 #define fs_param_neg_with_no	0x0002	/* "noxxx" is negative param */
-#define fs_param_neg_with_empty	0x0004	/* "xxx=" is negative param */
+#define fs_param_can_be_empty	0x0004	/* "xxx=" is allowed */
 #define fs_param_deprecated	0x0008	/* The param is deprecated */
 	const void		*data;
 };
-- 
cgit v1.2.3


From 3e5b1feccea7db576353ffc302f78d522e4116e6 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Dec 2021 13:11:32 +0000
Subject: net: phylink: add legacy_pre_march2020 indicator

Add a boolean to phylink_config to indicate whether a driver has not
been updated for the changes in commit 7cceb599d15d ("net: phylink:
avoid mac_config calls"), and thus are reliant on the old behaviour.

We were currently keying the phylink behaviour on the presence of a
PCS, but this is sub-optimal for modern drivers that may not have a
PCS.

This commit merely introduces the new flag, but does not add any use,
since we need all legacy drivers to set this flag before it can be
used. Once these legacy drivers have been updated, we can remove this
flag.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 01224235df0f..d005b8e36048 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -84,6 +84,8 @@ enum phylink_op_type {
  * struct phylink_config - PHYLINK configuration structure
  * @dev: a pointer to a struct device associated with the MAC
  * @type: operation type of PHYLINK instance
+ * @legacy_pre_march2020: driver has not been updated for March 2020 updates
+ *	(See commit 7cceb599d15d ("net: phylink: avoid mac_config calls")
  * @pcs_poll: MAC PCS cannot provide link change interrupt
  * @poll_fixed_state: if true, starts link_poll,
  *		      if MAC link is at %MLO_AN_FIXED mode.
@@ -97,6 +99,7 @@ enum phylink_op_type {
 struct phylink_config {
 	struct device *dev;
 	enum phylink_op_type type;
+	bool legacy_pre_march2020;
 	bool pcs_poll;
 	bool poll_fixed_state;
 	bool ovr_an_inband;
-- 
cgit v1.2.3


From 001f4261fe4d5ae710cf1f445b6cae6d9d3ae26e Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Dec 2021 13:11:48 +0000
Subject: net: phylink: use legacy_pre_march2020

Use the legacy flag to indicate whether we should operate in legacy
mode. This allows us to stop using the presence of a PCS as an
indicator to the age of the phylink user, and make PCS presence
optional.

Legacy mode involves:
1) calling mac_config() whenever the link comes up
2) calling mac_config() whenever the inband advertisement changes,
   possibly followed by a call to mac_an_restart()
3) making use of mac_an_restart()
4) making use of mac_pcs_get_state()

All the above functionality was moved to a seperate "PCS" block of
operations in March 2020.

Update the documents to indicate that the differences that this flag
makes.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index d005b8e36048..a2f266cc3442 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -190,6 +190,10 @@ void validate(struct phylink_config *config, unsigned long *supported,
  * negotiation completion state in @state->an_complete, and link up state
  * in @state->link. If possible, @state->lp_advertising should also be
  * populated.
+ *
+ * Note: This is a legacy method. This function will not be called unless
+ * legacy_pre_march2020 is set in &struct phylink_config and there is no
+ * PCS attached.
  */
 void mac_pcs_get_state(struct phylink_config *config,
 		       struct phylink_link_state *state);
@@ -230,6 +234,15 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
  * guaranteed to be correct, and so any mac_config() implementation must
  * never reference these fields.
  *
+ * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set
+ * in their &phylnk_config and which don't have a PCS), this function will be
+ * called on each link up event, and to also change the in-band advert. For
+ * non-legacy drivers, it will only be called to reconfigure the MAC for a
+ * "major" change in e.g. interface mode. It will not be called for changes
+ * in speed, duplex or pause modes or to change the in-band advertisement.
+ * In any case, it is strongly preferred that speed, duplex and pause settings
+ * are handled in the mac_link_up() method and not in this method.
+ *
  * (this requires a rewrite - please refer to mac_link_up() for situations
  *  where the PCS and MAC are not tightly integrated.)
  *
@@ -314,6 +327,10 @@ int mac_finish(struct phylink_config *config, unsigned int mode,
 /**
  * mac_an_restart() - restart 802.3z BaseX autonegotiation
  * @config: a pointer to a &struct phylink_config.
+ *
+ * Note: This is a legacy method. This function will not be called unless
+ * legacy_pre_march2020 is set in &struct phylink_config and there is no
+ * PCS attached.
  */
 void mac_an_restart(struct phylink_config *config);
 
-- 
cgit v1.2.3


From 1a2fb220edca98d18f90e3ef5bd6853a6b22b1b8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 6 Dec 2021 22:27:58 -0800
Subject: skbuff: Extract list pointers to silence compiler warnings

Under both -Warray-bounds and the object_size sanitizer, the compiler is
upset about accessing prev/next of sk_buff when the object it thinks it
is coming from is sk_buff_head. The warning is a false positive due to
the compiler taking a conservative approach, opting to warn at casting
time rather than access time.

However, in support of enabling -Warray-bounds globally (which has
found many real bugs), arrange things for sk_buff so that the compiler
can unambiguously see that there is no intention to access anything
except prev/next.  Introduce and cast to a separate struct sk_buff_list,
which contains _only_ the first two fields, silencing the warnings:

In file included from ./include/net/net_namespace.h:39,
                 from ./include/linux/netdevice.h:37,
                 from net/core/netpoll.c:17:
net/core/netpoll.c: In function 'refill_skbs':
./include/linux/skbuff.h:2086:9: warning: array subscript 'struct sk_buff[0]' is partly outside array bounds of 'struct sk_buff_head[1]' [-Warray-bounds]
 2086 |         __skb_insert(newsk, next->prev, next, list);
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
net/core/netpoll.c:49:28: note: while referencing 'skb_pool'
   49 | static struct sk_buff_head skb_pool;
      |                            ^~~~~~~~

This change results in no executable instruction differences.

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20211207062758.2324338-1-keescook@chromium.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dd262bd8ddbe..6535294f6a48 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -292,9 +292,11 @@ struct tc_skb_ext {
 #endif
 
 struct sk_buff_head {
-	/* These two members must be first. */
-	struct sk_buff	*next;
-	struct sk_buff	*prev;
+	/* These two members must be first to match sk_buff. */
+	struct_group_tagged(sk_buff_list, list,
+		struct sk_buff	*next;
+		struct sk_buff	*prev;
+	);
 
 	__u32		qlen;
 	spinlock_t	lock;
@@ -730,7 +732,7 @@ typedef unsigned char *sk_buff_data_t;
 struct sk_buff {
 	union {
 		struct {
-			/* These two members must be first. */
+			/* These two members must be first to match sk_buff_head. */
 			struct sk_buff		*next;
 			struct sk_buff		*prev;
 
@@ -1976,8 +1978,8 @@ static inline void __skb_insert(struct sk_buff *newsk,
 	 */
 	WRITE_ONCE(newsk->next, next);
 	WRITE_ONCE(newsk->prev, prev);
-	WRITE_ONCE(next->prev, newsk);
-	WRITE_ONCE(prev->next, newsk);
+	WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk);
+	WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk);
 	WRITE_ONCE(list->qlen, list->qlen + 1);
 }
 
@@ -2073,7 +2075,7 @@ static inline void __skb_queue_after(struct sk_buff_head *list,
 				     struct sk_buff *prev,
 				     struct sk_buff *newsk)
 {
-	__skb_insert(newsk, prev, prev->next, list);
+	__skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list);
 }
 
 void skb_append(struct sk_buff *old, struct sk_buff *newsk,
@@ -2083,7 +2085,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list,
 				      struct sk_buff *next,
 				      struct sk_buff *newsk)
 {
-	__skb_insert(newsk, next->prev, next, list);
+	__skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list);
 }
 
 /**
-- 
cgit v1.2.3


From a4f1192cb53758a7210ed5a9ee695aeba22f75fb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 9 Dec 2021 14:30:33 +0200
Subject: percpu_ref: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 include/linux/percpu-refcount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index b31d3f3312ce..d73a1c08c3e3 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -51,9 +51,9 @@
 #define _LINUX_PERCPU_REFCOUNT_H
 
 #include <linux/atomic.h>
-#include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/rcupdate.h>
+#include <linux/types.h>
 #include <linux/gfp.h>
 
 struct percpu_ref;
-- 
cgit v1.2.3


From 9756f64c8f2d19c0029a5827bda8ac275302ec22 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:11 +0100
Subject: kcsan: Avoid checking scoped accesses from nested contexts

Avoid checking scoped accesses from nested contexts (such as nested
interrupts or in scheduler code) which share the same kcsan_ctx.

This is to avoid detecting false positive races of accesses in the same
thread with currently scoped accesses: consider setting up a watchpoint
for a non-scoped (normal) access that also "conflicts" with a current
scoped access. In a nested interrupt (or in the scheduler), which shares
the same kcsan_ctx, we cannot check scoped accesses set up in the parent
context -- simply ignore them in this case.

With the introduction of kcsan_ctx::disable_scoped, we can also clean up
kcsan_check_scoped_accesses()'s recursion guard, and do not need to
modify the list's prev pointer.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/kcsan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h
index fc266ecb2a4d..13cef3458fed 100644
--- a/include/linux/kcsan.h
+++ b/include/linux/kcsan.h
@@ -21,6 +21,7 @@
  */
 struct kcsan_ctx {
 	int disable_count; /* disable counter */
+	int disable_scoped; /* disable scoped access counter */
 	int atomic_next; /* number of following atomic ops */
 
 	/*
-- 
cgit v1.2.3


From 69562e4983d93e2791c0bf128b07462afbd7f4dc Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 5 Aug 2021 14:57:45 +0200
Subject: kcsan: Add core support for a subset of weak memory modeling

Add support for modeling a subset of weak memory, which will enable
detection of a subset of data races due to missing memory barriers.

KCSAN's approach to detecting missing memory barriers is based on
modeling access reordering, and enabled if `CONFIG_KCSAN_WEAK_MEMORY=y`,
which depends on `CONFIG_KCSAN_STRICT=y`. The feature can be enabled or
disabled at boot and runtime via the `kcsan.weak_memory` boot parameter.

Each memory access for which a watchpoint is set up, is also selected
for simulated reordering within the scope of its function (at most 1
in-flight access).

We are limited to modeling the effects of "buffering" (delaying the
access), since the runtime cannot "prefetch" accesses (therefore no
acquire modeling). Once an access has been selected for reordering, it
is checked along every other access until the end of the function scope.
If an appropriate memory barrier is encountered, the access will no
longer be considered for reordering.

When the result of a memory operation should be ordered by a barrier,
KCSAN can then detect data races where the conflict only occurs as a
result of a missing barrier due to reordering accesses.

Suggested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/kcsan-checks.h | 10 +++++++++-
 include/linux/kcsan.h        | 10 +++++++++-
 include/linux/sched.h        |  3 +++
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h
index 5f5965246877..a1c6a89fde71 100644
--- a/include/linux/kcsan-checks.h
+++ b/include/linux/kcsan-checks.h
@@ -99,7 +99,15 @@ void kcsan_set_access_mask(unsigned long mask);
 
 /* Scoped access information. */
 struct kcsan_scoped_access {
-	struct list_head list;
+	union {
+		struct list_head list; /* scoped_accesses list */
+		/*
+		 * Not an entry in scoped_accesses list; stack depth from where
+		 * the access was initialized.
+		 */
+		int stack_depth;
+	};
+
 	/* Access information. */
 	const volatile void *ptr;
 	size_t size;
diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h
index 13cef3458fed..c07c71f5ba4f 100644
--- a/include/linux/kcsan.h
+++ b/include/linux/kcsan.h
@@ -49,8 +49,16 @@ struct kcsan_ctx {
 	 */
 	unsigned long access_mask;
 
-	/* List of scoped accesses. */
+	/* List of scoped accesses; likely to be empty. */
 	struct list_head scoped_accesses;
+
+#ifdef CONFIG_KCSAN_WEAK_MEMORY
+	/*
+	 * Scoped access for modeling access reordering to detect missing memory
+	 * barriers; only keep 1 to keep fast-path complexity manageable.
+	 */
+	struct kcsan_scoped_access reorder_access;
+#endif
 };
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..0cd40b010487 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1339,6 +1339,9 @@ struct task_struct {
 #ifdef CONFIG_TRACE_IRQFLAGS
 	struct irqtrace_events		kcsan_save_irqtrace;
 #endif
+#ifdef CONFIG_KCSAN_WEAK_MEMORY
+	int				kcsan_stack_depth;
+#endif
 #endif
 
 #if IS_ENABLED(CONFIG_KUNIT)
-- 
cgit v1.2.3


From 0b8b0830ac1419d7250fde31ea78793a03f3db44 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:13 +0100
Subject: kcsan: Add core memory barrier instrumentation functions

Add the core memory barrier instrumentation functions. These invalidate
the current in-flight reordered access based on the rules for the
respective barrier types and in-flight access type.

To obtain barrier instrumentation that can be disabled via __no_kcsan
with appropriate compiler-support (and not just with objtool help),
barrier instrumentation repurposes __atomic_signal_fence(), instead of
inserting explicit calls. Crucially, __atomic_signal_fence() normally
does not map to any real instructions, but is still intercepted by
fsanitize=thread. As a result, like any other instrumentation done by
the compiler, barrier instrumentation can be disabled with __no_kcsan.

Unfortunately Clang and GCC currently differ in their __no_kcsan aka
__no_sanitize_thread behaviour with respect to builtin atomics (and
__tsan_func_{entry,exit}) instrumentation. This is already reflected in
Kconfig.kcsan's dependencies for KCSAN_WEAK_MEMORY. A later change will
introduce support for newer versions of Clang that can implement
__no_kcsan to also remove the additional instrumentation introduced by
KCSAN_WEAK_MEMORY.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/kcsan-checks.h | 71 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h
index a1c6a89fde71..9d2c869167f2 100644
--- a/include/linux/kcsan-checks.h
+++ b/include/linux/kcsan-checks.h
@@ -36,6 +36,36 @@
  */
 void __kcsan_check_access(const volatile void *ptr, size_t size, int type);
 
+/*
+ * See definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c.
+ * Note: The mappings are arbitrary, and do not reflect any real mappings of C11
+ * memory orders to the LKMM memory orders and vice-versa!
+ */
+#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_mb	__ATOMIC_SEQ_CST
+#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_wmb	__ATOMIC_ACQ_REL
+#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_rmb	__ATOMIC_ACQUIRE
+#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_release	__ATOMIC_RELEASE
+
+/**
+ * __kcsan_mb - full memory barrier instrumentation
+ */
+void __kcsan_mb(void);
+
+/**
+ * __kcsan_wmb - write memory barrier instrumentation
+ */
+void __kcsan_wmb(void);
+
+/**
+ * __kcsan_rmb - read memory barrier instrumentation
+ */
+void __kcsan_rmb(void);
+
+/**
+ * __kcsan_release - release barrier instrumentation
+ */
+void __kcsan_release(void);
+
 /**
  * kcsan_disable_current - disable KCSAN for the current context
  *
@@ -159,6 +189,10 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa);
 static inline void __kcsan_check_access(const volatile void *ptr, size_t size,
 					int type) { }
 
+static inline void __kcsan_mb(void)			{ }
+static inline void __kcsan_wmb(void)			{ }
+static inline void __kcsan_rmb(void)			{ }
+static inline void __kcsan_release(void)		{ }
 static inline void kcsan_disable_current(void)		{ }
 static inline void kcsan_enable_current(void)		{ }
 static inline void kcsan_enable_current_nowarn(void)	{ }
@@ -191,12 +225,45 @@ static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { }
  */
 #define __kcsan_disable_current kcsan_disable_current
 #define __kcsan_enable_current kcsan_enable_current_nowarn
-#else
+#else /* __SANITIZE_THREAD__ */
 static inline void kcsan_check_access(const volatile void *ptr, size_t size,
 				      int type) { }
 static inline void __kcsan_enable_current(void)  { }
 static inline void __kcsan_disable_current(void) { }
-#endif
+#endif /* __SANITIZE_THREAD__ */
+
+#if defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__SANITIZE_THREAD__)
+/*
+ * Normal barrier instrumentation is not done via explicit calls, but by mapping
+ * to a repurposed __atomic_signal_fence(), which normally does not generate any
+ * real instructions, but is still intercepted by fsanitize=thread. This means,
+ * like any other compile-time instrumentation, barrier instrumentation can be
+ * disabled with the __no_kcsan function attribute.
+ *
+ * Also see definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c.
+ */
+#define __KCSAN_BARRIER_TO_SIGNAL_FENCE(name)					\
+	static __always_inline void kcsan_##name(void)				\
+	{									\
+		barrier();							\
+		__atomic_signal_fence(__KCSAN_BARRIER_TO_SIGNAL_FENCE_##name);	\
+		barrier();							\
+	}
+__KCSAN_BARRIER_TO_SIGNAL_FENCE(mb)
+__KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb)
+__KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb)
+__KCSAN_BARRIER_TO_SIGNAL_FENCE(release)
+#elif defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__KCSAN_INSTRUMENT_BARRIERS__)
+#define kcsan_mb	__kcsan_mb
+#define kcsan_wmb	__kcsan_wmb
+#define kcsan_rmb	__kcsan_rmb
+#define kcsan_release	__kcsan_release
+#else /* CONFIG_KCSAN_WEAK_MEMORY && ... */
+static inline void kcsan_mb(void)		{ }
+static inline void kcsan_wmb(void)		{ }
+static inline void kcsan_rmb(void)		{ }
+static inline void kcsan_release(void)		{ }
+#endif /* CONFIG_KCSAN_WEAK_MEMORY && ... */
 
 /**
  * __kcsan_check_read - check regular read access for races
-- 
cgit v1.2.3


From f948666de517cf8ebef7cb2c9b2d669dec4bfe2e Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:22 +0100
Subject: locking/barriers, kcsan: Add instrumentation for barriers

Adds the required KCSAN instrumentation for barriers if CONFIG_SMP.
KCSAN supports modeling the effects of:

	smp_mb()
	smp_rmb()
	smp_wmb()
	smp_store_release()

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/asm-generic/barrier.h | 29 +++++++++++++++--------------
 include/linux/spinlock.h      |  2 +-
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 640f09479bdf..27a9c9edfef6 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -14,6 +14,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/compiler.h>
+#include <linux/kcsan-checks.h>
 #include <asm/rwonce.h>
 
 #ifndef nop
@@ -62,15 +63,15 @@
 #ifdef CONFIG_SMP
 
 #ifndef smp_mb
-#define smp_mb()	__smp_mb()
+#define smp_mb()	do { kcsan_mb(); __smp_mb(); } while (0)
 #endif
 
 #ifndef smp_rmb
-#define smp_rmb()	__smp_rmb()
+#define smp_rmb()	do { kcsan_rmb(); __smp_rmb(); } while (0)
 #endif
 
 #ifndef smp_wmb
-#define smp_wmb()	__smp_wmb()
+#define smp_wmb()	do { kcsan_wmb(); __smp_wmb(); } while (0)
 #endif
 
 #else	/* !CONFIG_SMP */
@@ -123,19 +124,19 @@ do {									\
 #ifdef CONFIG_SMP
 
 #ifndef smp_store_mb
-#define smp_store_mb(var, value)  __smp_store_mb(var, value)
+#define smp_store_mb(var, value)  do { kcsan_mb(); __smp_store_mb(var, value); } while (0)
 #endif
 
 #ifndef smp_mb__before_atomic
-#define smp_mb__before_atomic()	__smp_mb__before_atomic()
+#define smp_mb__before_atomic()	do { kcsan_mb(); __smp_mb__before_atomic(); } while (0)
 #endif
 
 #ifndef smp_mb__after_atomic
-#define smp_mb__after_atomic()	__smp_mb__after_atomic()
+#define smp_mb__after_atomic()	do { kcsan_mb(); __smp_mb__after_atomic(); } while (0)
 #endif
 
 #ifndef smp_store_release
-#define smp_store_release(p, v) __smp_store_release(p, v)
+#define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0)
 #endif
 
 #ifndef smp_load_acquire
@@ -178,13 +179,13 @@ do {									\
 #endif	/* CONFIG_SMP */
 
 /* Barriers for virtual machine guests when talking to an SMP host */
-#define virt_mb() __smp_mb()
-#define virt_rmb() __smp_rmb()
-#define virt_wmb() __smp_wmb()
-#define virt_store_mb(var, value) __smp_store_mb(var, value)
-#define virt_mb__before_atomic() __smp_mb__before_atomic()
-#define virt_mb__after_atomic()	__smp_mb__after_atomic()
-#define virt_store_release(p, v) __smp_store_release(p, v)
+#define virt_mb() do { kcsan_mb(); __smp_mb(); } while (0)
+#define virt_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0)
+#define virt_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0)
+#define virt_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0)
+#define virt_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0)
+#define virt_mb__after_atomic()	do { kcsan_mb(); __smp_mb__after_atomic(); } while (0)
+#define virt_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0)
 #define virt_load_acquire(p) __smp_load_acquire(p)
 
 /**
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index b4e5ca23f840..5c0c5174155d 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -171,7 +171,7 @@ do {									\
  * Architectures that can implement ACQUIRE better need to take care.
  */
 #ifndef smp_mb__after_spinlock
-#define smp_mb__after_spinlock()	do { } while (0)
+#define smp_mb__after_spinlock()	kcsan_mb()
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-- 
cgit v1.2.3


From 2505a51ac6f249956735e0a369e2404f96eebef0 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:23 +0100
Subject: locking/barriers, kcsan: Support generic instrumentation

Thus far only smp_*() barriers had been defined by asm-generic/barrier.h
based on __smp_*() barriers, because the !SMP case is usually generic.

With the introduction of instrumentation, it also makes sense to have
asm-generic/barrier.h assist in the definition of instrumented versions
of mb(), rmb(), wmb(), dma_rmb(), and dma_wmb().

Because there is no requirement to distinguish the !SMP case, the
definition can be simpler: we can avoid also providing fallbacks for the
__ prefixed cases, and only check if `defined(__<barrier>)`, to finally
define the KCSAN-instrumented versions.

This also allows for the compiler to complain if an architecture
accidentally defines both the normal and __ prefixed variant.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/asm-generic/barrier.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 27a9c9edfef6..02c4339c8eeb 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -21,6 +21,31 @@
 #define nop()	asm volatile ("nop")
 #endif
 
+/*
+ * Architectures that want generic instrumentation can define __ prefixed
+ * variants of all barriers.
+ */
+
+#ifdef __mb
+#define mb()	do { kcsan_mb(); __mb(); } while (0)
+#endif
+
+#ifdef __rmb
+#define rmb()	do { kcsan_rmb(); __rmb(); } while (0)
+#endif
+
+#ifdef __wmb
+#define wmb()	do { kcsan_wmb(); __wmb(); } while (0)
+#endif
+
+#ifdef __dma_rmb
+#define dma_rmb()	do { kcsan_rmb(); __dma_rmb(); } while (0)
+#endif
+
+#ifdef __dma_wmb
+#define dma_wmb()	do { kcsan_wmb(); __dma_wmb(); } while (0)
+#endif
+
 /*
  * Force strict CPU ordering. And yes, this is required on UP too when we're
  * talking to devices.
-- 
cgit v1.2.3


From e87c4f6642f49627c3430cb3ee78c73fb51b48e4 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:24 +0100
Subject: locking/atomics, kcsan: Add instrumentation for barriers

Adds the required KCSAN instrumentation for barriers of atomics.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/atomic/atomic-instrumented.h | 135 ++++++++++++++++++++++++++++-
 1 file changed, 134 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index a0f654370da3..5d69b143c28e 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -45,6 +45,7 @@ atomic_set(atomic_t *v, int i)
 static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
+	kcsan_release();
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic_set_release(v, i);
 }
@@ -59,6 +60,7 @@ atomic_add(int i, atomic_t *v)
 static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_return(i, v);
 }
@@ -73,6 +75,7 @@ atomic_add_return_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_return_release(i, v);
 }
@@ -87,6 +90,7 @@ atomic_add_return_relaxed(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add(i, v);
 }
@@ -101,6 +105,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_release(i, v);
 }
@@ -122,6 +127,7 @@ atomic_sub(int i, atomic_t *v)
 static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_return(i, v);
 }
@@ -136,6 +142,7 @@ atomic_sub_return_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_return_release(i, v);
 }
@@ -150,6 +157,7 @@ atomic_sub_return_relaxed(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_sub(i, v);
 }
@@ -164,6 +172,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_sub_release(i, v);
 }
@@ -185,6 +194,7 @@ atomic_inc(atomic_t *v)
 static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_return(v);
 }
@@ -199,6 +209,7 @@ atomic_inc_return_acquire(atomic_t *v)
 static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_return_release(v);
 }
@@ -213,6 +224,7 @@ atomic_inc_return_relaxed(atomic_t *v)
 static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_inc(v);
 }
@@ -227,6 +239,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
 static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_inc_release(v);
 }
@@ -248,6 +261,7 @@ atomic_dec(atomic_t *v)
 static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_return(v);
 }
@@ -262,6 +276,7 @@ atomic_dec_return_acquire(atomic_t *v)
 static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_return_release(v);
 }
@@ -276,6 +291,7 @@ atomic_dec_return_relaxed(atomic_t *v)
 static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_dec(v);
 }
@@ -290,6 +306,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
 static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_dec_release(v);
 }
@@ -311,6 +328,7 @@ atomic_and(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_and(i, v);
 }
@@ -325,6 +343,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_and_release(i, v);
 }
@@ -346,6 +365,7 @@ atomic_andnot(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_andnot(i, v);
 }
@@ -360,6 +380,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_andnot_release(i, v);
 }
@@ -381,6 +402,7 @@ atomic_or(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_or(i, v);
 }
@@ -395,6 +417,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_or_release(i, v);
 }
@@ -416,6 +439,7 @@ atomic_xor(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_xor(i, v);
 }
@@ -430,6 +454,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_xor_release(i, v);
 }
@@ -444,6 +469,7 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v)
 static __always_inline int
 atomic_xchg(atomic_t *v, int i)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_xchg(v, i);
 }
@@ -458,6 +484,7 @@ atomic_xchg_acquire(atomic_t *v, int i)
 static __always_inline int
 atomic_xchg_release(atomic_t *v, int i)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_xchg_release(v, i);
 }
@@ -472,6 +499,7 @@ atomic_xchg_relaxed(atomic_t *v, int i)
 static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_cmpxchg(v, old, new);
 }
@@ -486,6 +514,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_cmpxchg_release(v, old, new);
 }
@@ -500,6 +529,7 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_try_cmpxchg(v, old, new);
@@ -516,6 +546,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_try_cmpxchg_release(v, old, new);
@@ -532,6 +563,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_and_test(i, v);
 }
@@ -539,6 +571,7 @@ atomic_sub_and_test(int i, atomic_t *v)
 static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_and_test(v);
 }
@@ -546,6 +579,7 @@ atomic_dec_and_test(atomic_t *v)
 static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_and_test(v);
 }
@@ -553,6 +587,7 @@ atomic_inc_and_test(atomic_t *v)
 static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_negative(i, v);
 }
@@ -560,6 +595,7 @@ atomic_add_negative(int i, atomic_t *v)
 static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_unless(v, a, u);
 }
@@ -567,6 +603,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
 static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_unless(v, a, u);
 }
@@ -574,6 +611,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
 static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_not_zero(v);
 }
@@ -581,6 +619,7 @@ atomic_inc_not_zero(atomic_t *v)
 static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_unless_negative(v);
 }
@@ -588,6 +627,7 @@ atomic_inc_unless_negative(atomic_t *v)
 static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_unless_positive(v);
 }
@@ -595,6 +635,7 @@ atomic_dec_unless_positive(atomic_t *v)
 static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_if_positive(v);
 }
@@ -623,6 +664,7 @@ atomic64_set(atomic64_t *v, s64 i)
 static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
+	kcsan_release();
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic64_set_release(v, i);
 }
@@ -637,6 +679,7 @@ atomic64_add(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_return(i, v);
 }
@@ -651,6 +694,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_return_release(i, v);
 }
@@ -665,6 +709,7 @@ atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add(i, v);
 }
@@ -679,6 +724,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_release(i, v);
 }
@@ -700,6 +746,7 @@ atomic64_sub(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_return(i, v);
 }
@@ -714,6 +761,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_return_release(i, v);
 }
@@ -728,6 +776,7 @@ atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_sub(i, v);
 }
@@ -742,6 +791,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_sub_release(i, v);
 }
@@ -763,6 +813,7 @@ atomic64_inc(atomic64_t *v)
 static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_return(v);
 }
@@ -777,6 +828,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
 static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_return_release(v);
 }
@@ -791,6 +843,7 @@ atomic64_inc_return_relaxed(atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_inc(v);
 }
@@ -805,6 +858,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_inc_release(v);
 }
@@ -826,6 +880,7 @@ atomic64_dec(atomic64_t *v)
 static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_return(v);
 }
@@ -840,6 +895,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
 static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_return_release(v);
 }
@@ -854,6 +910,7 @@ atomic64_dec_return_relaxed(atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_dec(v);
 }
@@ -868,6 +925,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_dec_release(v);
 }
@@ -889,6 +947,7 @@ atomic64_and(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_and(i, v);
 }
@@ -903,6 +962,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_and_release(i, v);
 }
@@ -924,6 +984,7 @@ atomic64_andnot(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_andnot(i, v);
 }
@@ -938,6 +999,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_andnot_release(i, v);
 }
@@ -959,6 +1021,7 @@ atomic64_or(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_or(i, v);
 }
@@ -973,6 +1036,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_or_release(i, v);
 }
@@ -994,6 +1058,7 @@ atomic64_xor(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_xor(i, v);
 }
@@ -1008,6 +1073,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_xor_release(i, v);
 }
@@ -1022,6 +1088,7 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 i)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_xchg(v, i);
 }
@@ -1036,6 +1103,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i)
 static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 i)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_xchg_release(v, i);
 }
@@ -1050,6 +1118,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_cmpxchg(v, old, new);
 }
@@ -1064,6 +1133,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_cmpxchg_release(v, old, new);
 }
@@ -1078,6 +1148,7 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic64_try_cmpxchg(v, old, new);
@@ -1094,6 +1165,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic64_try_cmpxchg_release(v, old, new);
@@ -1110,6 +1182,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_and_test(i, v);
 }
@@ -1117,6 +1190,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
 static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_and_test(v);
 }
@@ -1124,6 +1198,7 @@ atomic64_dec_and_test(atomic64_t *v)
 static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_and_test(v);
 }
@@ -1131,6 +1206,7 @@ atomic64_inc_and_test(atomic64_t *v)
 static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_negative(i, v);
 }
@@ -1138,6 +1214,7 @@ atomic64_add_negative(s64 i, atomic64_t *v)
 static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_unless(v, a, u);
 }
@@ -1145,6 +1222,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_unless(v, a, u);
 }
@@ -1152,6 +1230,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_not_zero(v);
 }
@@ -1159,6 +1238,7 @@ atomic64_inc_not_zero(atomic64_t *v)
 static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_unless_negative(v);
 }
@@ -1166,6 +1246,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
 static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_unless_positive(v);
 }
@@ -1173,6 +1254,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
 static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_if_positive(v);
 }
@@ -1201,6 +1283,7 @@ atomic_long_set(atomic_long_t *v, long i)
 static __always_inline void
 atomic_long_set_release(atomic_long_t *v, long i)
 {
+	kcsan_release();
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic_long_set_release(v, i);
 }
@@ -1215,6 +1298,7 @@ atomic_long_add(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_add_return(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_add_return(i, v);
 }
@@ -1229,6 +1313,7 @@ atomic_long_add_return_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_add_return_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_add_return_release(i, v);
 }
@@ -1243,6 +1328,7 @@ atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_add(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_add(i, v);
 }
@@ -1257,6 +1343,7 @@ atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_add_release(i, v);
 }
@@ -1278,6 +1365,7 @@ atomic_long_sub(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_sub_return(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_sub_return(i, v);
 }
@@ -1292,6 +1380,7 @@ atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_sub_return_release(i, v);
 }
@@ -1306,6 +1395,7 @@ atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_sub(i, v);
 }
@@ -1320,6 +1410,7 @@ atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_sub_release(i, v);
 }
@@ -1341,6 +1432,7 @@ atomic_long_inc(atomic_long_t *v)
 static __always_inline long
 atomic_long_inc_return(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_inc_return(v);
 }
@@ -1355,6 +1447,7 @@ atomic_long_inc_return_acquire(atomic_long_t *v)
 static __always_inline long
 atomic_long_inc_return_release(atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_inc_return_release(v);
 }
@@ -1369,6 +1462,7 @@ atomic_long_inc_return_relaxed(atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_inc(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_inc(v);
 }
@@ -1383,6 +1477,7 @@ atomic_long_fetch_inc_acquire(atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_inc_release(atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_inc_release(v);
 }
@@ -1404,6 +1499,7 @@ atomic_long_dec(atomic_long_t *v)
 static __always_inline long
 atomic_long_dec_return(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_dec_return(v);
 }
@@ -1418,6 +1514,7 @@ atomic_long_dec_return_acquire(atomic_long_t *v)
 static __always_inline long
 atomic_long_dec_return_release(atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_dec_return_release(v);
 }
@@ -1432,6 +1529,7 @@ atomic_long_dec_return_relaxed(atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_dec(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_dec(v);
 }
@@ -1446,6 +1544,7 @@ atomic_long_fetch_dec_acquire(atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_dec_release(atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_dec_release(v);
 }
@@ -1467,6 +1566,7 @@ atomic_long_and(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_and(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_and(i, v);
 }
@@ -1481,6 +1581,7 @@ atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_and_release(i, v);
 }
@@ -1502,6 +1603,7 @@ atomic_long_andnot(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_andnot(i, v);
 }
@@ -1516,6 +1618,7 @@ atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_andnot_release(i, v);
 }
@@ -1537,6 +1640,7 @@ atomic_long_or(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_or(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_or(i, v);
 }
@@ -1551,6 +1655,7 @@ atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_or_release(i, v);
 }
@@ -1572,6 +1677,7 @@ atomic_long_xor(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_xor(i, v);
 }
@@ -1586,6 +1692,7 @@ atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_xor_release(i, v);
 }
@@ -1600,6 +1707,7 @@ atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_xchg(atomic_long_t *v, long i)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_xchg(v, i);
 }
@@ -1614,6 +1722,7 @@ atomic_long_xchg_acquire(atomic_long_t *v, long i)
 static __always_inline long
 atomic_long_xchg_release(atomic_long_t *v, long i)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_xchg_release(v, i);
 }
@@ -1628,6 +1737,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 static __always_inline long
 atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_cmpxchg(v, old, new);
 }
@@ -1642,6 +1752,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 static __always_inline long
 atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_cmpxchg_release(v, old, new);
 }
@@ -1656,6 +1767,7 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 static __always_inline bool
 atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_long_try_cmpxchg(v, old, new);
@@ -1672,6 +1784,7 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 static __always_inline bool
 atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
+	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_long_try_cmpxchg_release(v, old, new);
@@ -1688,6 +1801,7 @@ atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 static __always_inline bool
 atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_sub_and_test(i, v);
 }
@@ -1695,6 +1809,7 @@ atomic_long_sub_and_test(long i, atomic_long_t *v)
 static __always_inline bool
 atomic_long_dec_and_test(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_dec_and_test(v);
 }
@@ -1702,6 +1817,7 @@ atomic_long_dec_and_test(atomic_long_t *v)
 static __always_inline bool
 atomic_long_inc_and_test(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_inc_and_test(v);
 }
@@ -1709,6 +1825,7 @@ atomic_long_inc_and_test(atomic_long_t *v)
 static __always_inline bool
 atomic_long_add_negative(long i, atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_add_negative(i, v);
 }
@@ -1716,6 +1833,7 @@ atomic_long_add_negative(long i, atomic_long_t *v)
 static __always_inline long
 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_fetch_add_unless(v, a, u);
 }
@@ -1723,6 +1841,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 static __always_inline bool
 atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_add_unless(v, a, u);
 }
@@ -1730,6 +1849,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u)
 static __always_inline bool
 atomic_long_inc_not_zero(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_inc_not_zero(v);
 }
@@ -1737,6 +1857,7 @@ atomic_long_inc_not_zero(atomic_long_t *v)
 static __always_inline bool
 atomic_long_inc_unless_negative(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_inc_unless_negative(v);
 }
@@ -1744,6 +1865,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v)
 static __always_inline bool
 atomic_long_dec_unless_positive(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_dec_unless_positive(v);
 }
@@ -1751,6 +1873,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v)
 static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_long_dec_if_positive(v);
 }
@@ -1758,6 +1881,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define xchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_xchg(__ai_ptr, __VA_ARGS__); \
 })
@@ -1772,6 +1896,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define xchg_release(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_release(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_xchg_release(__ai_ptr, __VA_ARGS__); \
 })
@@ -1786,6 +1911,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define cmpxchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
 })
@@ -1800,6 +1926,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define cmpxchg_release(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_release(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
 })
@@ -1814,6 +1941,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define cmpxchg64(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \
 })
@@ -1828,6 +1956,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define cmpxchg64_release(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_release(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
 })
@@ -1843,6 +1972,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	typeof(oldp) __ai_oldp = (oldp); \
+	kcsan_mb(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
 	arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
@@ -1861,6 +1991,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	typeof(oldp) __ai_oldp = (oldp); \
+	kcsan_release(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
 	arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
@@ -1892,6 +2023,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define sync_cmpxchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
 })
@@ -1899,6 +2031,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 #define cmpxchg_double(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
 	instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
 	arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \
 })
@@ -1912,4 +2045,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 })
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 2a9553f0a9d5619f19151092df5cabbbf16ce835
+// 87c974b93032afd42143613434d1a7788fa598f9
-- 
cgit v1.2.3


From 04def1b9b4a3d27ef80e7fbf1b17f1897beb1ce4 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:25 +0100
Subject: asm-generic/bitops, kcsan: Add instrumentation for barriers

Adds the required KCSAN instrumentation for barriers of atomic bitops.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/asm-generic/bitops/instrumented-atomic.h | 3 +++
 include/asm-generic/bitops/instrumented-lock.h   | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h
index 81915dcd4b4e..c90192b1c755 100644
--- a/include/asm-generic/bitops/instrumented-atomic.h
+++ b/include/asm-generic/bitops/instrumented-atomic.h
@@ -67,6 +67,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
 	return arch_test_and_set_bit(nr, addr);
 }
@@ -80,6 +81,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
 	return arch_test_and_clear_bit(nr, addr);
 }
@@ -93,6 +95,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
+	kcsan_mb();
 	instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
 	return arch_test_and_change_bit(nr, addr);
 }
diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h
index 75ef606f7145..eb64bd4f11f3 100644
--- a/include/asm-generic/bitops/instrumented-lock.h
+++ b/include/asm-generic/bitops/instrumented-lock.h
@@ -22,6 +22,7 @@
  */
 static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
+	kcsan_release();
 	instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
 	arch_clear_bit_unlock(nr, addr);
 }
@@ -37,6 +38,7 @@ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
  */
 static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
+	kcsan_release();
 	instrument_write(addr + BIT_WORD(nr), sizeof(long));
 	arch___clear_bit_unlock(nr, addr);
 }
@@ -71,6 +73,7 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 static inline bool
 clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
 {
+	kcsan_release();
 	instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
 	return arch_clear_bit_unlock_is_negative_byte(nr, addr);
 }
-- 
cgit v1.2.3


From a015b7085979b12e55f67f3b86be0321fff6be3f Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 30 Nov 2021 12:44:32 +0100
Subject: compiler_attributes.h: Add __disable_sanitizer_instrumentation

The new attribute maps to
__attribute__((disable_sanitizer_instrumentation)), which will be
supported by Clang >= 14.0. Future support in GCC is also possible.

This attribute disables compiler instrumentation for kernel sanitizer
tools, making it easier to implement noinstr. It is different from the
existing __no_sanitize* attributes, which may still allow certain types
of instrumentation to prevent false positives.

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/compiler_attributes.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index b9121afd8733..37e260020221 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -308,6 +308,24 @@
 # define __compiletime_warning(msg)
 #endif
 
+/*
+ * Optional: only supported since clang >= 14.0
+ *
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#disable-sanitizer-instrumentation
+ *
+ * disable_sanitizer_instrumentation is not always similar to
+ * no_sanitize((<sanitizer-name>)): the latter may still let specific sanitizers
+ * insert code into functions to prevent false positives. Unlike that,
+ * disable_sanitizer_instrumentation prevents all kinds of instrumentation to
+ * functions with the attribute.
+ */
+#if __has_attribute(disable_sanitizer_instrumentation)
+# define __disable_sanitizer_instrumentation \
+	 __attribute__((disable_sanitizer_instrumentation))
+#else
+# define __disable_sanitizer_instrumentation
+#endif
+
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
-- 
cgit v1.2.3


From bd3d5bd1a0ad386475ea7a3de8a91e7d8a600536 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 30 Nov 2021 12:44:33 +0100
Subject: kcsan: Support WEAK_MEMORY with Clang where no objtool support exists

Clang and GCC behave a little differently when it comes to the
__no_sanitize_thread attribute, which has valid reasons, and depending
on context either one could be right.

Traditionally, user space ThreadSanitizer [1] still expects instrumented
builtin atomics (to avoid false positives) and __tsan_func_{entry,exit}
(to generate meaningful stack traces), even if the function has the
attribute no_sanitize("thread").

[1] https://clang.llvm.org/docs/ThreadSanitizer.html#attribute-no-sanitize-thread

GCC doesn't follow the same policy (for better or worse), and removes
all kinds of instrumentation if no_sanitize is added. Arguably, since
this may be a problem for user space ThreadSanitizer, we expect this may
change in future.

Since KCSAN != ThreadSanitizer, the likelihood of false positives even
without barrier instrumentation everywhere, is much lower by design.

At least for Clang, however, to fully remove all sanitizer
instrumentation, we must add the disable_sanitizer_instrumentation
attribute, which is available since Clang 14.0.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/compiler_types.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 1d32f4c03c9e..3c1795fdb568 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -198,9 +198,20 @@ struct ftrace_likely_data {
 # define __no_kasan_or_inline __always_inline
 #endif
 
-#define __no_kcsan __no_sanitize_thread
 #ifdef __SANITIZE_THREAD__
+/*
+ * Clang still emits instrumentation for __tsan_func_{entry,exit}() and builtin
+ * atomics even with __no_sanitize_thread (to avoid false positives in userspace
+ * ThreadSanitizer). The kernel's requirements are stricter and we really do not
+ * want any instrumentation with __no_kcsan.
+ *
+ * Therefore we add __disable_sanitizer_instrumentation where available to
+ * disable all instrumentation. See Kconfig.kcsan where this is mandatory.
+ */
+# define __no_kcsan __no_sanitize_thread __disable_sanitizer_instrumentation
 # define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused
+#else
+# define __no_kcsan
 #endif
 
 #ifndef __no_sanitize_or_inline
-- 
cgit v1.2.3


From 80d7476fa20a3cc83f76b3b02a7575891d1e7511 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Sat, 4 Dec 2021 13:57:03 +0100
Subject: kcsan: Turn barrier instrumentation into macros

Some architectures use barriers in 'extern inline' functions, from which
we should not refer to static inline functions.

For example, building Alpha with gcc and W=1 shows:

./include/asm-generic/barrier.h:70:30: warning: 'kcsan_rmb' is static but used in inline function 'pmd_offset' which is not static
   70 | #define smp_rmb()       do { kcsan_rmb(); __smp_rmb(); } while (0)
      |                              ^~~~~~~~~
./arch/alpha/include/asm/pgtable.h:293:9: note: in expansion of macro 'smp_rmb'
  293 |         smp_rmb(); /* see above */
      |         ^~~~~~~

Which seems to warn about 6.7.4#3 of the C standard:
  "An inline definition of a function with external linkage shall not
   contain a definition of a modifiable object with static or thread
   storage duration, and shall not contain a reference to an identifier
   with internal linkage."

Fix it by turning barrier instrumentation into macros, which matches
definitions in <asm/barrier.h>.

Perhaps we can revert this change in future, when there are no more
'extern inline' users left.

Link: https://lkml.kernel.org/r/202112041334.X44uWZXf-lkp@intel.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/kcsan-checks.h | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h
index 9d2c869167f2..92f3843d9ebb 100644
--- a/include/linux/kcsan-checks.h
+++ b/include/linux/kcsan-checks.h
@@ -241,28 +241,30 @@ static inline void __kcsan_disable_current(void) { }
  * disabled with the __no_kcsan function attribute.
  *
  * Also see definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c.
+ *
+ * These are all macros, like <asm/barrier.h>, since some architectures use them
+ * in non-static inline functions.
  */
 #define __KCSAN_BARRIER_TO_SIGNAL_FENCE(name)					\
-	static __always_inline void kcsan_##name(void)				\
-	{									\
+	do {									\
 		barrier();							\
 		__atomic_signal_fence(__KCSAN_BARRIER_TO_SIGNAL_FENCE_##name);	\
 		barrier();							\
-	}
-__KCSAN_BARRIER_TO_SIGNAL_FENCE(mb)
-__KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb)
-__KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb)
-__KCSAN_BARRIER_TO_SIGNAL_FENCE(release)
+	} while (0)
+#define kcsan_mb()	__KCSAN_BARRIER_TO_SIGNAL_FENCE(mb)
+#define kcsan_wmb()	__KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb)
+#define kcsan_rmb()	__KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb)
+#define kcsan_release()	__KCSAN_BARRIER_TO_SIGNAL_FENCE(release)
 #elif defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__KCSAN_INSTRUMENT_BARRIERS__)
 #define kcsan_mb	__kcsan_mb
 #define kcsan_wmb	__kcsan_wmb
 #define kcsan_rmb	__kcsan_rmb
 #define kcsan_release	__kcsan_release
 #else /* CONFIG_KCSAN_WEAK_MEMORY && ... */
-static inline void kcsan_mb(void)		{ }
-static inline void kcsan_wmb(void)		{ }
-static inline void kcsan_rmb(void)		{ }
-static inline void kcsan_release(void)		{ }
+#define kcsan_mb()	do { } while (0)
+#define kcsan_wmb()	do { } while (0)
+#define kcsan_rmb()	do { } while (0)
+#define kcsan_release()	do { } while (0)
 #endif /* CONFIG_KCSAN_WEAK_MEMORY && ... */
 
 /**
-- 
cgit v1.2.3


From db67097aa6f2587b44055f2e16db72a11e17faef Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 9 Dec 2021 21:31:56 -0700
Subject: pktdvd: stop using bdi congestion framework.

The bdi congestion framework isn't widely used and should be
deprecated.

pktdvd makes use of it to track congestion, but this can be done
entirely internally to pktdvd, so it doesn't need to use the framework.

So introduce a "congested" flag.  When waiting for bio_queue_size to
drop, set this flag and a var_waitqueue() to wait for it.  When
bio_queue_size does drop and this flag is set, clear the flag and call
wake_up_var().

We don't use a wait_var_event macro for the waiting as we need to set
the flag and drop the spinlock before calling schedule() and while that
is possible with __wait_var_event(), result is not easy to read.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/163910843527.9928.857338663717630212@noble.neil.brown.name
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/pktcdvd.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 174601554b06..c391e694aa26 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -183,6 +183,8 @@ struct pktcdvd_device
 	spinlock_t		lock;		/* Serialize access to bio_queue */
 	struct rb_root		bio_queue;	/* Work queue of bios we need to handle */
 	int			bio_queue_size;	/* Number of nodes in bio_queue */
+	bool			congested;	/* Someone is waiting for bio_queue_size
+						 * to drop. */
 	sector_t		current_sector;	/* Keep track of where the elevator is */
 	atomic_t		scan_queue;	/* Set to non-zero when pkt_handle_queue */
 						/* needs to be run. */
-- 
cgit v1.2.3


From f95711242390d759f69fd67ad46b31491fe904d6 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Wed, 8 Dec 2021 17:43:53 +0000
Subject: EDAC: Add RDDR5 and LRDDR5 memory types

Include Registered-DDR5 and Load-Reduced DDR5 in the list of memory
types.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20211208174356.1997855-2-yazen.ghannam@amd.com
---
 include/linux/edac.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 4207d06996a4..e730b3468719 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -182,6 +182,8 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * @MEM_LRDDR4:		Load-Reduced DDR4 memory.
  * @MEM_LPDDR4:		Low-Power DDR4 memory.
  * @MEM_DDR5:		Unbuffered DDR5 RAM
+ * @MEM_RDDR5:		Registered DDR5 RAM
+ * @MEM_LRDDR5:		Load-Reduced DDR5 memory.
  * @MEM_NVDIMM:		Non-volatile RAM
  * @MEM_WIO2:		Wide I/O 2.
  * @MEM_HBM2:		High bandwidth Memory Gen 2.
@@ -211,6 +213,8 @@ enum mem_type {
 	MEM_LRDDR4,
 	MEM_LPDDR4,
 	MEM_DDR5,
+	MEM_RDDR5,
+	MEM_LRDDR5,
 	MEM_NVDIMM,
 	MEM_WIO2,
 	MEM_HBM2,
@@ -239,6 +243,8 @@ enum mem_type {
 #define MEM_FLAG_LRDDR4         BIT(MEM_LRDDR4)
 #define MEM_FLAG_LPDDR4         BIT(MEM_LPDDR4)
 #define MEM_FLAG_DDR5           BIT(MEM_DDR5)
+#define MEM_FLAG_RDDR5          BIT(MEM_RDDR5)
+#define MEM_FLAG_LRDDR5         BIT(MEM_LRDDR5)
 #define MEM_FLAG_NVDIMM         BIT(MEM_NVDIMM)
 #define MEM_FLAG_WIO2		BIT(MEM_WIO2)
 #define MEM_FLAG_HBM2		BIT(MEM_HBM2)
-- 
cgit v1.2.3


From 1614b2b11fab29dd4ff31ebba9d266961f5af69e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 29 Nov 2021 14:28:41 +0000
Subject: arch: Make ARCH_STACKWALK independent of STACKTRACE

Make arch_stack_walk() available for ARCH_STACKWALK architectures
without it being entangled in STACKTRACE.

Link: https://lore.kernel.org/lkml/20211022152104.356586621@infradead.org/
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[Mark: rebase, drop unnecessary arm change]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/20211129142849.3056714-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/stacktrace.h | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index bef158815e83..97455880ac41 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -8,22 +8,6 @@
 struct task_struct;
 struct pt_regs;
 
-#ifdef CONFIG_STACKTRACE
-void stack_trace_print(const unsigned long *trace, unsigned int nr_entries,
-		       int spaces);
-int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries,
-			unsigned int nr_entries, int spaces);
-unsigned int stack_trace_save(unsigned long *store, unsigned int size,
-			      unsigned int skipnr);
-unsigned int stack_trace_save_tsk(struct task_struct *task,
-				  unsigned long *store, unsigned int size,
-				  unsigned int skipnr);
-unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
-				   unsigned int size, unsigned int skipnr);
-unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
-unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
-
-/* Internal interfaces. Do not use in generic code */
 #ifdef CONFIG_ARCH_STACKWALK
 
 /**
@@ -76,8 +60,25 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
 
 void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
 			  const struct pt_regs *regs);
+#endif /* CONFIG_ARCH_STACKWALK */
 
-#else /* CONFIG_ARCH_STACKWALK */
+#ifdef CONFIG_STACKTRACE
+void stack_trace_print(const unsigned long *trace, unsigned int nr_entries,
+		       int spaces);
+int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries,
+			unsigned int nr_entries, int spaces);
+unsigned int stack_trace_save(unsigned long *store, unsigned int size,
+			      unsigned int skipnr);
+unsigned int stack_trace_save_tsk(struct task_struct *task,
+				  unsigned long *store, unsigned int size,
+				  unsigned int skipnr);
+unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
+				   unsigned int size, unsigned int skipnr);
+unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
+unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
+
+#ifndef CONFIG_ARCH_STACKWALK
+/* Internal interfaces. Do not use in generic code */
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
-- 
cgit v1.2.3


From 9ba74e6c9e9d0c5c1e5792a7111fc7d1a0589cb8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Dec 2021 23:44:21 -0800
Subject: net: add networking namespace refcount tracker

We have 100+ syzbot reports about netns being dismantled too soon,
still unresolved as of today.

We think a missing get_net() or an extra put_net() is the root cause.

In order to find the bug(s), and be able to spot future ones,
this patch adds CONFIG_NET_NS_REFCNT_TRACKER and new helpers
to precisely pair all put_net() with corresponding get_net().

To use these helpers, each data structure owning a refcount
should also use a "netns_tracker" to pair the get and put.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h   |  9 +--------
 include/net/net_namespace.h | 34 ++++++++++++++++++++++++++++++++++
 include/net/net_trackers.h  | 18 ++++++++++++++++++
 3 files changed, 53 insertions(+), 8 deletions(-)
 create mode 100644 include/net/net_trackers.h

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a748ee9a421..235d5d082f1a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -48,7 +48,7 @@
 #include <uapi/linux/pkt_cls.h>
 #include <linux/hashtable.h>
 #include <linux/rbtree.h>
-#include <linux/ref_tracker.h>
+#include <net/net_trackers.h>
 
 struct netpoll_info;
 struct device;
@@ -300,13 +300,6 @@ enum netdev_state_t {
 	__LINK_STATE_TESTING,
 };
 
-
-#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
-typedef struct ref_tracker *netdevice_tracker;
-#else
-typedef struct {} netdevice_tracker;
-#endif
-
 struct gro_list {
 	struct list_head	list;
 	int			count;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bb5fa5914032..5b61c462e534 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -34,6 +34,7 @@
 #include <net/netns/smc.h>
 #include <net/netns/bpf.h>
 #include <net/netns/mctp.h>
+#include <net/net_trackers.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
@@ -87,6 +88,7 @@ struct net {
 	struct idr		netns_ids;
 
 	struct ns_common	ns;
+	struct ref_tracker_dir  refcnt_tracker;
 
 	struct list_head 	dev_base_head;
 	struct proc_dir_entry 	*proc_net;
@@ -240,6 +242,7 @@ void ipx_unregister_sysctl(void);
 #ifdef CONFIG_NET_NS
 void __put_net(struct net *net);
 
+/* Try using get_net_track() instead */
 static inline struct net *get_net(struct net *net)
 {
 	refcount_inc(&net->ns.count);
@@ -258,6 +261,7 @@ static inline struct net *maybe_get_net(struct net *net)
 	return net;
 }
 
+/* Try using put_net_track() instead */
 static inline void put_net(struct net *net)
 {
 	if (refcount_dec_and_test(&net->ns.count))
@@ -308,6 +312,36 @@ static inline int check_net(const struct net *net)
 #endif
 
 
+static inline void netns_tracker_alloc(struct net *net,
+				       netns_tracker *tracker, gfp_t gfp)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+	ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp);
+#endif
+}
+
+static inline void netns_tracker_free(struct net *net,
+				      netns_tracker *tracker)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+       ref_tracker_free(&net->refcnt_tracker, tracker);
+#endif
+}
+
+static inline struct net *get_net_track(struct net *net,
+					netns_tracker *tracker, gfp_t gfp)
+{
+	get_net(net);
+	netns_tracker_alloc(net, tracker, gfp);
+	return net;
+}
+
+static inline void put_net_track(struct net *net, netns_tracker *tracker)
+{
+	netns_tracker_free(net, tracker);
+	put_net(net);
+}
+
 typedef struct {
 #ifdef CONFIG_NET_NS
 	struct net *net;
diff --git a/include/net/net_trackers.h b/include/net/net_trackers.h
new file mode 100644
index 000000000000..d94c76cf15a9
--- /dev/null
+++ b/include/net/net_trackers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_NET_TRACKERS_H
+#define __NET_NET_TRACKERS_H
+#include <linux/ref_tracker.h>
+
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+typedef struct ref_tracker *netdevice_tracker;
+#else
+typedef struct {} netdevice_tracker;
+#endif
+
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+typedef struct ref_tracker *netns_tracker;
+#else
+typedef struct {} netns_tracker;
+#endif
+
+#endif /* __NET_NET_TRACKERS_H */
-- 
cgit v1.2.3


From ffa84b5ffb37a957d6062385112ab1069f760de6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Dec 2021 23:44:22 -0800
Subject: net: add netns refcount tracker to struct sock

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index ae61cd0b650d..5d8532f26208 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -350,6 +350,7 @@ struct bpf_local_storage;
   *	@sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
   *	@sk_txtime_report_errors: set report errors mode for SO_TXTIME
   *	@sk_txtime_unused: unused txtime flags
+  *	@ns_tracker: tracker for netns reference
   */
 struct sock {
 	/*
@@ -538,6 +539,7 @@ struct sock {
 	struct bpf_local_storage __rcu	*sk_bpf_storage;
 #endif
 	struct rcu_head		sk_rcu;
+	netns_tracker		ns_tracker;
 };
 
 enum sk_pacing {
-- 
cgit v1.2.3


From 04a931e58d1944ab3d1e11fdfde1947fbe5b6a37 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Dec 2021 23:44:23 -0800
Subject: net: add netns refcount tracker to struct seq_net_private

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/seq_file_net.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h
index 0fdbe1ddd8d1..b97912fdbae7 100644
--- a/include/linux/seq_file_net.h
+++ b/include/linux/seq_file_net.h
@@ -9,7 +9,8 @@ extern struct net init_net;
 
 struct seq_net_private {
 #ifdef CONFIG_NET_NS
-	struct net *net;
+	struct net	*net;
+	netns_tracker	ns_tracker;
 #endif
 };
 
-- 
cgit v1.2.3


From dbdcda634ce384938805549bd1b3f3eaed50af5e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Dec 2021 23:44:24 -0800
Subject: net: sched: add netns refcount tracker to struct tcf_exts

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/pkt_cls.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 193f88ebf629..cebc1bd713b6 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -202,7 +202,8 @@ struct tcf_exts {
 	__u32	type; /* for backward compat(TCA_OLD_COMPAT) */
 	int nr_actions;
 	struct tc_action **actions;
-	struct net *net;
+	struct net	*net;
+	netns_tracker	ns_tracker;
 #endif
 	/* Map to export classifier specific extension TLV types to the
 	 * generic extensions API. Unsupported extensions must be set to 0.
@@ -218,6 +219,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
 	exts->type = 0;
 	exts->nr_actions = 0;
 	exts->net = net;
+	netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL);
 	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
 				GFP_KERNEL);
 	if (!exts->actions)
@@ -236,6 +238,8 @@ static inline bool tcf_exts_get_net(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	exts->net = maybe_get_net(exts->net);
+	if (exts->net)
+		netns_tracker_alloc(exts->net, &exts->ns_tracker, GFP_KERNEL);
 	return exts->net != NULL;
 #else
 	return true;
@@ -246,7 +250,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (exts->net)
-		put_net(exts->net);
+		put_net_track(exts->net, &exts->ns_tracker);
 #endif
 }
 
-- 
cgit v1.2.3


From e1b539bd73a76dc8a7bf82befe6eac4ae79c76b3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Dec 2021 07:44:51 -0800
Subject: xfrm: add net device refcount tracker to struct xfrm_state_offload

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Link: https://lore.kernel.org/r/20211209154451.4184050-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/xfrm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2308210793a0..83b46da8873d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -128,6 +128,7 @@ struct xfrm_state_walk {
 
 struct xfrm_state_offload {
 	struct net_device	*dev;
+	netdevice_tracker	dev_tracker;
 	struct net_device	*real_dev;
 	unsigned long		offload_handle;
 	unsigned int		num_exthdrs;
@@ -1913,7 +1914,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 		if (dev->xfrmdev_ops->xdo_dev_state_free)
 			dev->xfrmdev_ops->xdo_dev_state_free(x);
 		xso->dev = NULL;
-		dev_put(dev);
+		dev_put_track(dev, &xso->dev_tracker);
 	}
 }
 #else
-- 
cgit v1.2.3


From 65c7cdedeb3026fabcc967a7aae2f755ad4d0783 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Sep 2021 11:24:17 -0400
Subject: genirq: Provide new interfaces for affinity hints

The discussion about removing the side effect of irq_set_affinity_hint() of
actually applying the cpumask (if not NULL) as affinity to the interrupt,
unearthed a few unpleasantries:

  1) The modular perf drivers rely on the current behaviour for the very
     wrong reasons.

  2) While none of the other drivers prevents user space from changing
     the affinity, a cursorily inspection shows that there are at least
     expectations in some drivers.

#1 needs to be cleaned up anyway, so that's not a problem

#2 might result in subtle regressions especially when irqbalanced (which
   nowadays ignores the affinity hint) is disabled.

Provide new interfaces:

  irq_update_affinity_hint()  - Only sets the affinity hint pointer
  irq_set_affinity_and_hint() - Set the pointer and apply the affinity to
                                the interrupt

Make irq_set_affinity_hint() a wrapper around irq_apply_affinity_hint() and
document it to be phased out.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210501021832.743094-1-jesse.brandeburg@intel.com
Link: https://lore.kernel.org/r/20210903152430.244937-2-nitesh@redhat.com
---
 include/linux/interrupt.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 1f22a30c0963..9367f1cb2e3c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -329,7 +329,46 @@ extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
-extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
+extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,
+				     bool setaffinity);
+
+/**
+ * irq_update_affinity_hint - Update the affinity hint
+ * @irq:	Interrupt to update
+ * @m:		cpumask pointer (NULL to clear the hint)
+ *
+ * Updates the affinity hint, but does not change the affinity of the interrupt.
+ */
+static inline int
+irq_update_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+	return __irq_apply_affinity_hint(irq, m, false);
+}
+
+/**
+ * irq_set_affinity_and_hint - Update the affinity hint and apply the provided
+ *			     cpumask to the interrupt
+ * @irq:	Interrupt to update
+ * @m:		cpumask pointer (NULL to clear the hint)
+ *
+ * Updates the affinity hint and if @m is not NULL it applies it as the
+ * affinity of that interrupt.
+ */
+static inline int
+irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m)
+{
+	return __irq_apply_affinity_hint(irq, m, true);
+}
+
+/*
+ * Deprecated. Use irq_update_affinity_hint() or irq_set_affinity_and_hint()
+ * instead.
+ */
+static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+	return irq_set_affinity_and_hint(irq, m);
+}
+
 extern int irq_update_affinity_desc(unsigned int irq,
 				    struct irq_affinity_desc *affinity);
 
@@ -361,6 +400,18 @@ static inline int irq_can_set_affinity(unsigned int irq)
 
 static inline int irq_select_affinity(unsigned int irq)  { return 0; }
 
+static inline int irq_update_affinity_hint(unsigned int irq,
+					   const struct cpumask *m)
+{
+	return -EINVAL;
+}
+
+static inline int irq_set_affinity_and_hint(unsigned int irq,
+					    const struct cpumask *m)
+{
+	return -EINVAL;
+}
+
 static inline int irq_set_affinity_hint(unsigned int irq,
 					const struct cpumask *m)
 {
-- 
cgit v1.2.3


From bd984c03097b8e9b7500cba7378040ac1c697dbb Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 19 Nov 2021 11:20:33 -0800
Subject: f2fs: show more DIO information in tracepoint

This prints more information of DIO in tracepoint.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index dcb94d740e12..f701bb23f83c 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -936,14 +936,14 @@ TRACE_EVENT(f2fs_fallocate,
 
 TRACE_EVENT(f2fs_direct_IO_enter,
 
-	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+	TP_PROTO(struct inode *inode, struct kiocb *iocb, long len, int rw),
 
-	TP_ARGS(inode, offset, len, rw),
+	TP_ARGS(inode, iocb, len, rw),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
-		__field(loff_t,	pos)
+		__field(struct kiocb *,	iocb)
 		__field(unsigned long,	len)
 		__field(int,	rw)
 	),
@@ -951,15 +951,18 @@ TRACE_EVENT(f2fs_direct_IO_enter,
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
-		__entry->pos	= offset;
+		__entry->iocb	= iocb;
 		__entry->len	= len;
 		__entry->rw	= rw;
 	),
 
-	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d",
+	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d",
 		show_dev_ino(__entry),
-		__entry->pos,
+		__entry->iocb->ki_pos,
 		__entry->len,
+		__entry->iocb->ki_flags,
+		__entry->iocb->ki_hint,
+		__entry->iocb->ki_ioprio,
 		__entry->rw)
 );
 
-- 
cgit v1.2.3


From 9dcc38e2813e0cd3b195940c98b181ce6ede8f20 Mon Sep 17 00:00:00 2001
From: Drew DeVault <sir@cmpwn.com>
Date: Fri, 10 Dec 2021 14:46:09 -0800
Subject: Increase default MLOCK_LIMIT to 8 MiB

This limit has not been updated since 2008, when it was increased to 64
KiB at the request of GnuPG.  Until recently, the main use-cases for this
feature were (1) preventing sensitive memory from being swapped, as in
GnuPG's use-case; and (2) real-time use-cases.  In the first case, little
memory is called for, and in the second case, the user is generally in a
position to increase it if they need more.

The introduction of IOURING_REGISTER_BUFFERS adds a third use-case:
preparing fixed buffers for high-performance I/O.  This use-case will take
as much of this memory as it can get, but is still limited to 64 KiB by
default, which is very little.  This increases the limit to 8 MB, which
was chosen fairly arbitrarily as a more generous, but still conservative,
default value.

It is also possible to raise this limit in userspace.  This is easily
done, for example, in the use-case of a network daemon: systemd, for
instance, provides for this via LimitMEMLOCK in the service file; OpenRC
via the rc_ulimit variables.  However, there is no established userspace
facility for configuring this outside of daemons: end-user applications do
not presently have access to a convenient means of raising their limits.

The buck, as it were, stops with the kernel.  It's much easier to address
it here than it is to bring it to hundreds of distributions, and it can
only realistically be relied upon to be high-enough by end-user software
if it is more-or-less ubiquitous.  Most distros don't change this
particular rlimit from the kernel-supplied default value, so a change here
will easily provide that ubiquity.

Link: https://lkml.kernel.org/r/20211028080813.15966-1-sir@cmpwn.com
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Cyril Hrubis <chrubis@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Andrew Dona-Couch <andrew@donacou.ch>
Cc: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/resource.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h
index 74ef57b38f9f..ac5d6a3031db 100644
--- a/include/uapi/linux/resource.h
+++ b/include/uapi/linux/resource.h
@@ -66,10 +66,17 @@ struct rlimit64 {
 #define _STK_LIM	(8*1024*1024)
 
 /*
- * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
- * and other sensitive information are never written to disk.
+ * Limit the amount of locked memory by some sane default:
+ * root can always increase this limit if needed.
+ *
+ * The main use-cases are (1) preventing sensitive memory
+ * from being swapped; (2) real-time operations; (3) via
+ * IOURING_REGISTER_BUFFERS.
+ *
+ * The first two don't need much. The latter will take as
+ * much as it can get. 8MB is a reasonably sane default.
  */
-#define MLOCK_LIMIT	((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024)
+#define MLOCK_LIMIT	(8*1024*1024)
 
 /*
  * Due to binary compatibility, the actual resource numbers
-- 
cgit v1.2.3


From e4779015fd5d2fb8390c258268addff24d6077c7 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 10 Dec 2021 14:46:22 -0800
Subject: timers: implement usleep_idle_range()

Patch series "mm/damon: Fix fake /proc/loadavg reports", v3.

This patchset fixes DAMON's fake load report issue.  The first patch
makes yet another variant of usleep_range() for this fix, and the second
patch fixes the issue of DAMON by making it using the newly introduced
function.

This patch (of 2):

Some kernel threads such as DAMON could need to repeatedly sleep in
micro seconds level.  Because usleep_range() sleeps in uninterruptible
state, however, such threads would make /proc/loadavg reports fake load.

To help such cases, this commit implements a variant of usleep_range()
called usleep_idle_range().  It is same to usleep_range() but sets the
state of the current task as TASK_IDLE while sleeping.

Link: https://lkml.kernel.org/r/20211126145015.15862-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211126145015.15862-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Cc: John Stultz <john.stultz@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delay.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index 8eacf67eb212..039e7e0c7378 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -20,6 +20,7 @@
  */
 
 #include <linux/math.h>
+#include <linux/sched.h>
 
 extern unsigned long loops_per_jiffy;
 
@@ -58,7 +59,18 @@ void calibrate_delay(void);
 void __attribute__((weak)) calibration_delay_done(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
-void usleep_range(unsigned long min, unsigned long max);
+void usleep_range_state(unsigned long min, unsigned long max,
+			unsigned int state);
+
+static inline void usleep_range(unsigned long min, unsigned long max)
+{
+	usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
+}
+
+static inline void usleep_idle_range(unsigned long min, unsigned long max)
+{
+	usleep_range_state(min, max, TASK_IDLE);
+}
 
 static inline void ssleep(unsigned int seconds)
 {
-- 
cgit v1.2.3


From 33d60fbd21fa6f71a88571209e301ec6de59f81b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Wed, 8 Dec 2021 15:21:58 +0900
Subject: sock: Use sock_owned_by_user_nocheck() instead of sk_lock.owned.

This patch moves sock_release_ownership() down in include/net/sock.h and
replaces some sk_lock.owned tests with sock_owned_by_user_nocheck().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Link: https://lore.kernel.org/r/20211208062158.54132-1-kuniyu@amazon.co.jp
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 5d8532f26208..37f878564d25 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1635,16 +1635,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
 		__sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
 }
 
-static inline void sock_release_ownership(struct sock *sk)
-{
-	if (sk->sk_lock.owned) {
-		sk->sk_lock.owned = 0;
-
-		/* The sk_lock has mutex_unlock() semantics: */
-		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
-	}
-}
-
 /*
  * Macro so as to not evaluate some arguments when
  * lockdep is not enabled.
@@ -1771,12 +1761,23 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
 	return sk->sk_lock.owned;
 }
 
+static inline void sock_release_ownership(struct sock *sk)
+{
+	if (sock_owned_by_user_nocheck(sk)) {
+		sk->sk_lock.owned = 0;
+
+		/* The sk_lock has mutex_unlock() semantics: */
+		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
+	}
+}
+
 /* no reclassification while locks are held */
 static inline bool sock_allow_reclassification(const struct sock *csk)
 {
 	struct sock *sk = (struct sock *)csk;
 
-	return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock);
+	return !sock_owned_by_user_nocheck(sk) &&
+		!spin_is_locked(&sk->sk_lock.slock);
 }
 
 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
-- 
cgit v1.2.3


From 840ece19e9f246a1b15308ae76b68aaf7a3a9433 Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Wed, 8 Dec 2021 23:40:10 -0800
Subject: net: ocelot: fix missed include in the vsc7514_regs.h file

commit 32ecd22ba60b ("net: mscc: ocelot: split register definitions to a
separate file") left out an include for <soc/mscc/ocelot_vcap.h>. It was
missed because the only consumer was ocelot_vsc7514.h, which already
included ocelot_vcap.

Fixes: 32ecd22ba60b ("net: mscc: ocelot: split register definitions to a separate file")
Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20211209074010.1813010-1-colin.foster@in-advantage.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/vsc7514_regs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/vsc7514_regs.h b/include/soc/mscc/vsc7514_regs.h
index 98743e252012..ceee26c96959 100644
--- a/include/soc/mscc/vsc7514_regs.h
+++ b/include/soc/mscc/vsc7514_regs.h
@@ -8,6 +8,8 @@
 #ifndef VSC7514_REGS_H
 #define VSC7514_REGS_H
 
+#include <soc/mscc/ocelot_vcap.h>
+
 extern const u32 vsc7514_ana_regmap[];
 extern const u32 vsc7514_qs_regmap[];
 extern const u32 vsc7514_qsys_regmap[];
-- 
cgit v1.2.3


From e5150f00721f6f8e7b4e7f31bff86b4b6a8de0d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= <clement.leger@bootlin.com>
Date: Thu, 9 Dec 2021 16:49:08 +0100
Subject: net: ocelot: export ocelot_ifh_port_set() to setup IFH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FDMA will need this code to prepare the injection frame header when
sending SKBs. Move this code into ocelot_ifh_port_set() and add
conditional IFH setting for vlan and rew op if they are not set.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Clément Léger <clement.leger@bootlin.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/ocelot.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 33f2e8c9e88b..9b99cfd39a59 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -794,6 +794,7 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
 bool ocelot_can_inject(struct ocelot *ocelot, int grp);
 void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 			      u32 rew_op, struct sk_buff *skb);
+void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag);
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
 
-- 
cgit v1.2.3


From b471a71e525c73608a6ae5a3fdd2a5d1224da6a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= <clement.leger@bootlin.com>
Date: Thu, 9 Dec 2021 16:49:09 +0100
Subject: net: ocelot: add and export ocelot_ptp_rx_timestamp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to support PTP in FDMA, PTP handling code is needed. Since
this is the same as for register-based extraction, export it with
a new ocelot_ptp_rx_timestamp() function.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Clément Léger <clement.leger@bootlin.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/ocelot.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 9b99cfd39a59..f038062a97a9 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -797,6 +797,8 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag);
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
+void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb,
+			     u64 timestamp);
 
 /* Hardware initialization */
 int ocelot_regfields_init(struct ocelot *ocelot,
-- 
cgit v1.2.3


From 753a026cfec1429c9e32e004ae4d4c2727cc0111 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= <clement.leger@bootlin.com>
Date: Thu, 9 Dec 2021 16:49:11 +0100
Subject: net: ocelot: add FDMA support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ethernet frames can be extracted or injected autonomously to or from
the device’s DDR3/DDR3L memory and/or PCIe memory space. Linked list
data structures in memory are used for injecting or extracting Ethernet
frames. The FDMA generates interrupts when frame extraction or
injection is done and when the linked lists need updating.

The FDMA is shared between all the ethernet ports of the switch and
uses a linked list of descriptors (DCB) to inject and extract packets.
Before adding descriptors, the FDMA channels must be stopped. It would
be inefficient to do that each time a descriptor would be added so the
channels are restarted only once they stopped.

Both channels uses ring-like structure to feed the DCBs to the FDMA.
head and tail are never touched by hardware and are completely handled
by the driver. On top of that, page recycling has been added and is
mostly taken from gianfar driver.

Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Co-developed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Clément Léger <clement.leger@bootlin.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/ocelot.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index f038062a97a9..3e9454b00562 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -118,6 +118,7 @@ enum ocelot_target {
 	S2,
 	HSIO,
 	PTP,
+	FDMA,
 	GCB,
 	DEV_GMII,
 	TARGET_MAX,
@@ -732,6 +733,8 @@ struct ocelot {
 	/* Protects the PTP clock */
 	spinlock_t			ptp_clock_lock;
 	struct ptp_pin_desc		ptp_pins[OCELOT_PTP_PINS_NUM];
+
+	struct ocelot_fdma		*fdma;
 };
 
 struct ocelot_policer {
-- 
cgit v1.2.3


From bff8c3848e071d387d8b0784dc91fa49cd563774 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 10 Nov 2021 11:01:03 +0100
Subject: bitfield.h: Fix "type of reg too small for mask" test

The test: 'mask > (typeof(_reg))~0ull' only works correctly when both
sides are unsigned, consider:

 - 0xff000000 vs (int)~0ull
 - 0x000000ff vs (int)~0ull

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20211110101324.950210584@infradead.org
---
 include/linux/bitfield.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 4e035aca6f7e..6093fa6db260 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -41,6 +41,22 @@
 
 #define __bf_shf(x) (__builtin_ffsll(x) - 1)
 
+#define __scalar_type_to_unsigned_cases(type)				\
+		unsigned type:	(unsigned type)0,			\
+		signed type:	(unsigned type)0
+
+#define __unsigned_scalar_typeof(x) typeof(				\
+		_Generic((x),						\
+			char:	(unsigned char)0,			\
+			__scalar_type_to_unsigned_cases(char),		\
+			__scalar_type_to_unsigned_cases(short),		\
+			__scalar_type_to_unsigned_cases(int),		\
+			__scalar_type_to_unsigned_cases(long),		\
+			__scalar_type_to_unsigned_cases(long long),	\
+			default: (x)))
+
+#define __bf_cast_unsigned(type, x)	((__unsigned_scalar_typeof(type))(x))
+
 #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx)			\
 	({								\
 		BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),		\
@@ -49,7 +65,8 @@
 		BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?		\
 				 ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
 				 _pfx "value too large for the field"); \
-		BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull,		\
+		BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >	\
+				 __bf_cast_unsigned(_reg, ~0ull),	\
 				 _pfx "type of reg too small for mask"); \
 		__BUILD_BUG_ON_NOT_POWER_OF_2((_mask) +			\
 					      (1ULL << __bf_shf(_mask))); \
-- 
cgit v1.2.3


From 4121485d271bd730537f613ce041e7ea659606a7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 9 Dec 2021 21:52:31 +0200
Subject: PCI: Sort Intel Device IDs by value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sort Intel Device IDs by value.

[bhelgaas: lower-case Intel section since we're touching it anyway]
Link: https://lore.kernel.org/r/20211209195231.2785-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Wilczyński <kw@linux.com>
---
 include/linux/pci_ids.h | 50 ++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 011f2f1ea5bb..0d26ab7eb7dc 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2635,8 +2635,8 @@
 #define PCI_DEVICE_ID_INTEL_PXHD_0	0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1	0x0321
 #define PCI_DEVICE_ID_INTEL_PXH_0	0x0329
-#define PCI_DEVICE_ID_INTEL_PXH_1	0x032A
-#define PCI_DEVICE_ID_INTEL_PXHV	0x032C
+#define PCI_DEVICE_ID_INTEL_PXH_1	0x032a
+#define PCI_DEVICE_ID_INTEL_PXHV	0x032c
 #define PCI_DEVICE_ID_INTEL_80332_0	0x0330
 #define PCI_DEVICE_ID_INTEL_80332_1	0x0332
 #define PCI_DEVICE_ID_INTEL_80333_0	0x0370
@@ -2654,14 +2654,14 @@
 #define PCI_DEVICE_ID_INTEL_MFD_SDIO2	0x0822
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0	0x0823
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1	0x0824
-#define PCI_DEVICE_ID_INTEL_MRST_SD2	0x084F
-#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB	0x095E
+#define PCI_DEVICE_ID_INTEL_MRST_SD2	0x084f
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB	0x095e
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB	0x0c60
 #define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
 #define PCI_DEVICE_ID_INTEL_82573E_SOL	0x1085
-#define PCI_DEVICE_ID_INTEL_82573L_SOL	0x108F
+#define PCI_DEVICE_ID_INTEL_82573L_SOL	0x108f
 #define PCI_DEVICE_ID_INTEL_82815_MC	0x1130
 #define PCI_DEVICE_ID_INTEL_82815_CGC	0x1132
 #define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
@@ -2755,12 +2755,6 @@
 #define PCI_DEVICE_ID_INTEL_82801EB_11	0x24db
 #define PCI_DEVICE_ID_INTEL_82801EB_12	0x24dc
 #define PCI_DEVICE_ID_INTEL_82801EB_13	0x24dd
-#define PCI_DEVICE_ID_INTEL_ESB_1	0x25a1
-#define PCI_DEVICE_ID_INTEL_ESB_2	0x25a2
-#define PCI_DEVICE_ID_INTEL_ESB_4	0x25a4
-#define PCI_DEVICE_ID_INTEL_ESB_5	0x25a6
-#define PCI_DEVICE_ID_INTEL_ESB_9	0x25ab
-#define PCI_DEVICE_ID_INTEL_ESB_10	0x25ac
 #define PCI_DEVICE_ID_INTEL_82820_HB	0x2500
 #define PCI_DEVICE_ID_INTEL_82820_UP_HB	0x2501
 #define PCI_DEVICE_ID_INTEL_82850_HB	0x2530
@@ -2775,14 +2769,15 @@
 #define PCI_DEVICE_ID_INTEL_82915G_IG	0x2582
 #define PCI_DEVICE_ID_INTEL_82915GM_HB	0x2590
 #define PCI_DEVICE_ID_INTEL_82915GM_IG	0x2592
-#define PCI_DEVICE_ID_INTEL_5000_ERR	0x25F0
-#define PCI_DEVICE_ID_INTEL_5000_FBD0	0x25F5
-#define PCI_DEVICE_ID_INTEL_5000_FBD1	0x25F6
-#define PCI_DEVICE_ID_INTEL_82945G_HB	0x2770
-#define PCI_DEVICE_ID_INTEL_82945G_IG	0x2772
-#define PCI_DEVICE_ID_INTEL_3000_HB	0x2778
-#define PCI_DEVICE_ID_INTEL_82945GM_HB	0x27A0
-#define PCI_DEVICE_ID_INTEL_82945GM_IG	0x27A2
+#define PCI_DEVICE_ID_INTEL_ESB_1	0x25a1
+#define PCI_DEVICE_ID_INTEL_ESB_2	0x25a2
+#define PCI_DEVICE_ID_INTEL_ESB_4	0x25a4
+#define PCI_DEVICE_ID_INTEL_ESB_5	0x25a6
+#define PCI_DEVICE_ID_INTEL_ESB_9	0x25ab
+#define PCI_DEVICE_ID_INTEL_ESB_10	0x25ac
+#define PCI_DEVICE_ID_INTEL_5000_ERR	0x25f0
+#define PCI_DEVICE_ID_INTEL_5000_FBD0	0x25f5
+#define PCI_DEVICE_ID_INTEL_5000_FBD1	0x25f6
 #define PCI_DEVICE_ID_INTEL_ICH6_0	0x2640
 #define PCI_DEVICE_ID_INTEL_ICH6_1	0x2641
 #define PCI_DEVICE_ID_INTEL_ICH6_2	0x2642
@@ -2794,6 +2789,11 @@
 #define PCI_DEVICE_ID_INTEL_ESB2_14	0x2698
 #define PCI_DEVICE_ID_INTEL_ESB2_17	0x269b
 #define PCI_DEVICE_ID_INTEL_ESB2_18	0x269e
+#define PCI_DEVICE_ID_INTEL_82945G_HB	0x2770
+#define PCI_DEVICE_ID_INTEL_82945G_IG	0x2772
+#define PCI_DEVICE_ID_INTEL_3000_HB	0x2778
+#define PCI_DEVICE_ID_INTEL_82945GM_HB	0x27a0
+#define PCI_DEVICE_ID_INTEL_82945GM_IG	0x27a2
 #define PCI_DEVICE_ID_INTEL_ICH7_0	0x27b8
 #define PCI_DEVICE_ID_INTEL_ICH7_1	0x27b9
 #define PCI_DEVICE_ID_INTEL_ICH7_30	0x27b0
@@ -2846,7 +2846,7 @@
 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_PHY0    0x2c91
 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR         0x2c98
 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD      0x2c99
-#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST     0x2c9C
+#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST     0x2c9c
 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL 0x2ca0
 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR 0x2ca1
 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK 0x2ca2
@@ -2958,16 +2958,16 @@
 #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR		0x3cf5	/* 13.6 */
 #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1	0x3cf6	/* 12.7 */
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
-#define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
-#define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
-#define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
-#define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
 #define PCI_DEVICE_ID_INTEL_5400_FBD0	0x4035
 #define PCI_DEVICE_ID_INTEL_5400_FBD1	0x4036
-#define PCI_DEVICE_ID_INTEL_IOAT_SCNB	0x65ff
 #define PCI_DEVICE_ID_INTEL_EP80579_0	0x5031
 #define PCI_DEVICE_ID_INTEL_EP80579_1	0x5032
+#define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
+#define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
+#define PCI_DEVICE_ID_INTEL_IOAT_SCNB	0x65ff
 #define PCI_DEVICE_ID_INTEL_82371SB_0	0x7000
 #define PCI_DEVICE_ID_INTEL_82371SB_1	0x7010
 #define PCI_DEVICE_ID_INTEL_82371SB_2	0x7020
-- 
cgit v1.2.3


From 0f09c274698590d508c43f924d9dffc7130b782d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 3 Dec 2021 09:07:56 +0100
Subject: futex: Fix additional regressions

Naresh reported another architecture that was broken by the same typo that
was already fixed for three architectures: mips also refers to the
futex_atomic_op_inuser_local() function by the wrong name and runs into a
missing closing '}' as well.

Going through the source tree the same typo was found in the documentation
as well as in the xtensa code, both of which ended up escaping the
regression testing so far. In the case of xtensa, it appears that the
broken code path is only used when building for platforms that are not
supported by the default gcc configuration, so they are impossible to test
for with default setups.

After going through these more carefully and fixing up the typos, all
architectures have been build-tested again to ensure that this is now
complete.

Fixes: 4e0d84634445 ("futex: Fix sparc32/m68k/nds32 build regression")
Fixes: 3f2bedabb62c ("futex: Ensure futex_atomic_cmpxchg_inatomic() is present")
Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20211203080823.2938839-1-arnd@kernel.org
---
 include/asm-generic/futex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 66d6843bfd02..2a19215baae5 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -21,7 +21,7 @@
 #endif
 
 /**
- * arch_futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant
+ * futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant
  *			  argument and comparison of the previous
  *			  futex value with another constant.
  *
-- 
cgit v1.2.3


From c5fb19937455095573a19ddcbff32e993ed10e35 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 10 Dec 2021 22:16:49 +0800
Subject: bpf: Add bpf_strncmp helper

The helper compares two strings: one string is a null-terminated
read-only string, and another string has const max storage size
but doesn't need to be null-terminated. It can be used to compare
file name in tracing or LSM program.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211210141652.877186-2-houtao1@huawei.com
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0ceb54c6342f..7a40022e3d00 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2163,6 +2163,7 @@ extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
+extern const struct bpf_func_proto bpf_strncmp_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c26871263f1f..2820c77e4846 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4983,6 +4983,16 @@ union bpf_attr {
  *	Return
  *		The number of loops performed, **-EINVAL** for invalid **flags**,
  *		**-E2BIG** if **nr_loops** exceeds the maximum number of loops.
+ *
+ * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2)
+ *	Description
+ *		Do strncmp() between **s1** and **s2**. **s1** doesn't need
+ *		to be null-terminated and **s1_sz** is the maximum storage
+ *		size of **s1**. **s2** must be a read-only string.
+ *	Return
+ *		An integer less than, equal to, or greater than zero
+ *		if the first **s1_sz** bytes of **s1** is found to be
+ *		less than, to match, or be greater than **s2**.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5167,6 +5177,7 @@ union bpf_attr {
 	FN(kallsyms_lookup_name),	\
 	FN(find_vma),			\
 	FN(loop),			\
+	FN(strncmp),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From dc452a471dbae8aca8257c565174212620880093 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:37 +0200
Subject: net: dsa: introduce tagger-owned storage for private and shared data

Ansuel is working on register access over Ethernet for the qca8k switch
family. This requires the qca8k tagging protocol driver to receive
frames which aren't intended for the network stack, but instead for the
qca8k switch driver itself.

The dp->priv is currently the prevailing method for passing data back
and forth between the tagging protocol driver and the switch driver.
However, this method is riddled with caveats.

The DSA design allows in principle for any switch driver to return any
protocol it desires in ->get_tag_protocol(). The dsa_loop driver can be
modified to do just that. But in the current design, the memory behind
dp->priv has to be allocated by the switch driver, so if the tagging
protocol is paired to an unexpected switch driver, we may end up in NULL
pointer dereferences inside the kernel, or worse (a switch driver may
allocate dp->priv according to the expectations of a different tagger).

The latter possibility is even more plausible considering that DSA
switches can dynamically change tagging protocols in certain cases
(dsa <-> edsa, ocelot <-> ocelot-8021q), and the current design lends
itself to mistakes that are all too easy to make.

This patch proposes that the tagging protocol driver should manage its
own memory, instead of relying on the switch driver to do so.
After analyzing the different in-tree needs, it can be observed that the
required tagger storage is per switch, therefore a ds->tagger_data
pointer is introduced. In principle, per-port storage could also be
introduced, although there is no need for it at the moment. Future
changes will replace the current usage of dp->priv with ds->tagger_data.

We define a "binding" event between the DSA switch tree and the tagging
protocol. During this binding event, the tagging protocol's ->connect()
method is called first, and this may allocate some memory for each
switch of the tree. Then a cross-chip notifier is emitted for the
switches within that tree, and they are given the opportunity to fix up
the tagger's memory (for example, they might set up some function
pointers that represent virtual methods for consuming packets).
Because the memory is owned by the tagger, there exists a ->disconnect()
method for the tagger (which is the place to free the resources), but
there doesn't exist a ->disconnect() method for the switch driver.
This is part of the design. The switch driver should make minimal use of
the public part of the tagger data, and only after type-checking it
using the supplied "proto" argument.

In the code there are in fact two binding events, one is the initial
event in dsa_switch_setup_tag_protocol(). At this stage, the cross chip
notifier chains aren't initialized, so we call each switch's connect()
method by hand. Then there is dsa_tree_bind_tag_proto() during
dsa_tree_change_tag_proto(), and here we have an old protocol and a new
one. We first connect to the new one before disconnecting from the old
one, to simplify error handling a bit and to ensure we remain in a valid
state at all times.

Co-developed-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index bdf308a5c55e..8b496c7e62ef 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -82,12 +82,15 @@ enum dsa_tag_protocol {
 };
 
 struct dsa_switch;
+struct dsa_switch_tree;
 
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
 	void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
 			     int *offset);
+	int (*connect)(struct dsa_switch_tree *dst);
+	void (*disconnect)(struct dsa_switch_tree *dst);
 	unsigned int needed_headroom;
 	unsigned int needed_tailroom;
 	const char *name;
@@ -337,6 +340,8 @@ struct dsa_switch {
 	 */
 	void *priv;
 
+	void *tagger_data;
+
 	/*
 	 * Configuration data for this switch.
 	 */
@@ -689,6 +694,13 @@ struct dsa_switch_ops {
 						  enum dsa_tag_protocol mprot);
 	int	(*change_tag_protocol)(struct dsa_switch *ds, int port,
 				       enum dsa_tag_protocol proto);
+	/*
+	 * Method for switch drivers to connect to the tagging protocol driver
+	 * in current use. The switch driver can provide handlers for certain
+	 * types of packets for switch management.
+	 */
+	int	(*connect_tag_protocol)(struct dsa_switch *ds,
+					enum dsa_tag_protocol proto);
 
 	/* Optional switch-wide initialization and destruction methods */
 	int	(*setup)(struct dsa_switch *ds);
-- 
cgit v1.2.3


From 35d976802124303a5b3eb7ec3ed188d568204373 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:38 +0200
Subject: net: dsa: tag_ocelot: convert to tagger-owned data

The felix driver makes very light use of dp->priv, and the tagger is
effectively stateless. dp->priv is practically only needed to set up a
callback to perform deferred xmit of PTP and STP packets using the
ocelot-8021q tagging protocol (the main ocelot tagging protocol makes no
use of dp->priv, although this driver sets up dp->priv irrespective of
actual tagging protocol in use).

struct felix_port (what used to be pointed to by dp->priv) is removed
and replaced with a two-sided structure. The public side of this
structure, visible to the switch driver, is ocelot_8021q_tagger_data.
The private side is ocelot_8021q_tagger_private, and the latter
structure physically encapsulates the former. The public half of the
tagger data structure can be accessed through a helper of the same name
(ocelot_8021q_tagger_data) which also sanity-checks the protocol
currently in use by the switch. The public/private split was requested
by Andrew Lunn.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/ocelot.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 7ee708ad7df2..dca2969015d8 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -8,6 +8,7 @@
 #include <linux/kthread.h>
 #include <linux/packing.h>
 #include <linux/skbuff.h>
+#include <net/dsa.h>
 
 struct ocelot_skb_cb {
 	struct sk_buff *clone;
@@ -168,11 +169,18 @@ struct felix_deferred_xmit_work {
 	struct kthread_work work;
 };
 
-struct felix_port {
+struct ocelot_8021q_tagger_data {
 	void (*xmit_work_fn)(struct kthread_work *work);
-	struct kthread_worker *xmit_worker;
 };
 
+static inline struct ocelot_8021q_tagger_data *
+ocelot_8021q_tagger_data(struct dsa_switch *ds)
+{
+	BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_OCELOT_8021Q);
+
+	return ds->tagger_data;
+}
+
 static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val)
 {
 	packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0);
-- 
cgit v1.2.3


From d38049bbe7601f38d598f5da5ff09980483b290a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:40 +0200
Subject: net: dsa: sja1105: bring deferred xmit implementation in line with
 ocelot-8021q

When the ocelot-8021q driver was converted to deferred xmit as part of
commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during
init and teardown"), the deferred implementation was deliberately made
subtly different from what sja1105 has.

The implementation differences lied on the following observations:

- There might be a race between these two lines in tag_sja1105.c:

       skb_queue_tail(&sp->xmit_queue, skb_get(skb));
       kthread_queue_work(sp->xmit_worker, &sp->xmit_work);

  and the skb dequeue logic in sja1105_port_deferred_xmit(). For
  example, the xmit_work might be already queued, however the work item
  has just finished walking through the skb queue. Because we don't
  check the return code from kthread_queue_work, we don't do anything if
  the work item is already queued.

  However, nobody will take that skb and send it, at least until the
  next timestampable skb is sent. This creates additional (and
  avoidable) TX timestamping latency.

  To close that race, what the ocelot-8021q driver does is it doesn't
  keep a single work item per port, and a skb timestamping queue, but
  rather dynamically allocates a work item per packet.

- It is also unnecessary to have more than one kthread that does the
  work. So delete the per-port kthread allocations and replace them with
  a single kthread which is global to the switch.

This change brings the two implementations in line by applying those
observations to the sja1105 driver as well.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index e6c78be40bde..acd9d2afccab 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -37,6 +37,12 @@
 
 #define SJA1105_HWTS_RX_EN			0
 
+struct sja1105_deferred_xmit_work {
+	struct dsa_port *dp;
+	struct sk_buff *skb;
+	struct kthread_work work;
+};
+
 /* Global tagger data: each struct sja1105_port has a reference to
  * the structure defined in struct sja1105_private.
  */
@@ -52,6 +58,8 @@ struct sja1105_tagger_data {
 	 * 2-step TX timestamps
 	 */
 	struct sk_buff_head skb_txtstamp_queue;
+	struct kthread_worker *xmit_worker;
+	void (*xmit_work_fn)(struct kthread_work *work);
 };
 
 struct sja1105_skb_cb {
@@ -65,9 +73,6 @@ struct sja1105_skb_cb {
 	((struct sja1105_skb_cb *)((skb)->cb))
 
 struct sja1105_port {
-	struct kthread_worker *xmit_worker;
-	struct kthread_work xmit_work;
-	struct sk_buff_head xmit_queue;
 	struct sja1105_tagger_data *data;
 	bool hwts_tx_en;
 };
-- 
cgit v1.2.3


From 6f6770ab1ce2b56619264ec6be0b62f05564dcf6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:41 +0200
Subject: net: dsa: sja1105: remove hwts_tx_en from tagger data

This tagger property is in fact not used at all by the tagger, only by
the switch driver. Therefore it makes sense to be moved to
sja1105_private.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index acd9d2afccab..32a8a1344cf6 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -74,7 +74,6 @@ struct sja1105_skb_cb {
 
 struct sja1105_port {
 	struct sja1105_tagger_data *data;
-	bool hwts_tx_en;
 };
 
 /* Timestamps are in units of 8 ns clock ticks (equivalent to
-- 
cgit v1.2.3


From bfcf1425222008e7390c0784b0f3bb7b497fccaa Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:42 +0200
Subject: net: dsa: sja1105: make dp->priv point directly to
 sja1105_tagger_data

The design of the sja1105 tagger dp->priv is that each port has a
separate struct sja1105_port, and the sp->data pointer points to a
common struct sja1105_tagger_data.

We have removed all per-port members accessible by the tagger, and now
only struct sja1105_tagger_data remains. Make dp->priv point directly to
this.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 32a8a1344cf6..1dda9cce85d9 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -43,9 +43,7 @@ struct sja1105_deferred_xmit_work {
 	struct kthread_work work;
 };
 
-/* Global tagger data: each struct sja1105_port has a reference to
- * the structure defined in struct sja1105_private.
- */
+/* Global tagger data */
 struct sja1105_tagger_data {
 	struct sk_buff *stampable_skb;
 	/* Protects concurrent access to the meta state machine
@@ -72,10 +70,6 @@ struct sja1105_skb_cb {
 #define SJA1105_SKB_CB(skb) \
 	((struct sja1105_skb_cb *)((skb)->cb))
 
-struct sja1105_port {
-	struct sja1105_tagger_data *data;
-};
-
 /* Timestamps are in units of 8 ns clock ticks (equivalent to
  * a fixed 125 MHz clock).
  */
-- 
cgit v1.2.3


From 22ee9f8e4011fb8a6d75dc2f9a43360d4f400235 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:43 +0200
Subject: net: dsa: sja1105: move ts_id from sja1105_tagger_data

The TX timestamp ID is incremented by the SJA1110 PTP timestamping
callback (->port_tx_timestamp) for every packet, when cloning it.
It isn't used by the tagger at all, even though it sits inside the
struct sja1105_tagger_data.

Also, serialization to this structure is currently done through
tagger_data->meta_lock, which is a cheap hack because the meta_lock
isn't used for anything else on SJA1110 (sja1105_rcv_meta_state_machine
isn't called).

This change moves ts_id from sja1105_tagger_data to sja1105_private and
introduces a dedicated spinlock for it, also in sja1105_private.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 1dda9cce85d9..d8ee53085c09 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -51,7 +51,6 @@ struct sja1105_tagger_data {
 	 */
 	spinlock_t meta_lock;
 	unsigned long state;
-	u8 ts_id;
 	/* Used on SJA1110 where meta frames are generated only for
 	 * 2-step TX timestamps
 	 */
-- 
cgit v1.2.3


From c79e84866d2ac637fce921a28288f214e91d662b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:44 +0200
Subject: net: dsa: tag_sja1105: convert to tagger-owned data

Currently, struct sja1105_tagger_data is a part of struct
sja1105_private, and is used by the sja1105 driver to populate dp->priv.

With the movement towards tagger-owned storage, the sja1105 driver
should not be the owner of this memory.

This change implements the connection between the sja1105 switch driver
and its tagging protocol, which means that sja1105_tagger_data no longer
stays in dp->priv but in ds->tagger_data, and that the sja1105 driver
now only populates the sja1105_port_deferred_xmit callback pointer.
The kthread worker is now the responsibility of the tagger.

The sja1105 driver also alters the tagger's state some more, especially
with regard to the PTP RX timestamping state. This will be fixed up a
bit in further changes.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index d8ee53085c09..9f7d42cbbc08 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -84,9 +84,12 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks)
 	return ticks * SJA1105_TICK_NS;
 }
 
-static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+static inline struct sja1105_tagger_data *
+sja1105_tagger_data(struct dsa_switch *ds)
 {
-	return true;
+	BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105);
+
+	return ds->tagger_data;
 }
 
 #endif /* _NET_DSA_SJA1105_H */
-- 
cgit v1.2.3


From fcbf979a5b4b5784bfb5647ae6190cd5c2ae595d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:45 +0200
Subject: Revert "net: dsa: move sja1110_process_meta_tstamp inside the tagging
 protocol driver"

This reverts commit 6d709cadfde68dbd12bef12fcced6222226dcb06.

The above change was done to avoid calling symbols exported by the
switch driver from the tagging protocol driver.

With the tagger-owned storage model, we have a new option on our hands,
and that is for the switch driver to provide a data consumer handler in
the form of a function pointer inside the ->connect_tag_protocol()
method. Having a function pointer avoids the problems of the exported
symbols approach.

By creating a handler for metadata frames holding TX timestamps on
SJA1110, we are able to eliminate an skb queue from the tagger data, and
replace it with a simple, and stateless, function pointer. This skb
queue is now handled exclusively by sja1105_ptp.c, which makes the code
easier to follow, as it used to be before the reverted patch.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 9f7d42cbbc08..d216211b64f8 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -37,6 +37,11 @@
 
 #define SJA1105_HWTS_RX_EN			0
 
+enum sja1110_meta_tstamp {
+	SJA1110_META_TSTAMP_TX = 0,
+	SJA1110_META_TSTAMP_RX = 1,
+};
+
 struct sja1105_deferred_xmit_work {
 	struct dsa_port *dp;
 	struct sk_buff *skb;
@@ -51,12 +56,10 @@ struct sja1105_tagger_data {
 	 */
 	spinlock_t meta_lock;
 	unsigned long state;
-	/* Used on SJA1110 where meta frames are generated only for
-	 * 2-step TX timestamps
-	 */
-	struct sk_buff_head skb_txtstamp_queue;
 	struct kthread_worker *xmit_worker;
 	void (*xmit_work_fn)(struct kthread_work *work);
+	void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id,
+				    enum sja1110_meta_tstamp dir, u64 tstamp);
 };
 
 struct sja1105_skb_cb {
@@ -69,21 +72,6 @@ struct sja1105_skb_cb {
 #define SJA1105_SKB_CB(skb) \
 	((struct sja1105_skb_cb *)((skb)->cb))
 
-/* Timestamps are in units of 8 ns clock ticks (equivalent to
- * a fixed 125 MHz clock).
- */
-#define SJA1105_TICK_NS			8
-
-static inline s64 ns_to_sja1105_ticks(s64 ns)
-{
-	return ns / SJA1105_TICK_NS;
-}
-
-static inline s64 sja1105_ticks_to_ns(s64 ticks)
-{
-	return ticks * SJA1105_TICK_NS;
-}
-
 static inline struct sja1105_tagger_data *
 sja1105_tagger_data(struct dsa_switch *ds)
 {
-- 
cgit v1.2.3


From 950a419d9de13668f86828394cb242a1f9dece74 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:46 +0200
Subject: net: dsa: tag_sja1105: split sja1105_tagger_data into private and
 public sections

The sja1105 driver messes with the tagging protocol's state when PTP RX
timestamping is enabled/disabled. This is fundamentally necessary
because the tagger needs to know what to do when it receives a PTP
packet. If RX timestamping is enabled, then a metadata follow-up frame
is expected, and this holds the (partial) timestamp. So the tagger plays
hide-and-seek with the network stack until it also gets the metadata
frame, and then presents a single packet, the timestamped PTP packet.
But when RX timestamping isn't enabled, there is no metadata frame
expected, so the hide-and-seek game must be turned off and the packet
must be delivered right away to the network stack.

Considering this, we create a pseudo isolation by devising two tagger
methods callable by the switch: one to get the RX timestamping state,
and one to set it. Since we can't export symbols between the tagger and
the switch driver, these methods are exposed through function pointers.

After this change, the public portion of the sja1105_tagger_data
contains only function pointers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index d216211b64f8..e9cb1ae6d742 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -35,8 +35,6 @@
 #define SJA1105_META_SMAC			0x222222222222ull
 #define SJA1105_META_DMAC			0x0180C200000Eull
 
-#define SJA1105_HWTS_RX_EN			0
-
 enum sja1110_meta_tstamp {
 	SJA1110_META_TSTAMP_TX = 0,
 	SJA1110_META_TSTAMP_RX = 1,
@@ -50,16 +48,13 @@ struct sja1105_deferred_xmit_work {
 
 /* Global tagger data */
 struct sja1105_tagger_data {
-	struct sk_buff *stampable_skb;
-	/* Protects concurrent access to the meta state machine
-	 * from taggers running on multiple ports on SMP systems
-	 */
-	spinlock_t meta_lock;
-	unsigned long state;
-	struct kthread_worker *xmit_worker;
+	/* Tagger to switch */
 	void (*xmit_work_fn)(struct kthread_work *work);
 	void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id,
 				    enum sja1110_meta_tstamp dir, u64 tstamp);
+	/* Switch to tagger */
+	bool (*rxtstamp_get_state)(struct dsa_switch *ds);
+	void (*rxtstamp_set_state)(struct dsa_switch *ds, bool on);
 };
 
 struct sja1105_skb_cb {
-- 
cgit v1.2.3


From 4f3cb34364e2552ce5f3a8ca33a184c13c6152a5 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 10 Dec 2021 01:34:47 +0200
Subject: net: dsa: remove dp->priv

All current in-tree uses of dp->priv have been replaced with
ds->tagger_data, which provides for a safer API especially when the
connection isn't the regular 1:1 link between one switch driver and one
tagging protocol driver, but could be either one switch to many taggers,
or many switches to one tagger.

Therefore, we can remove this unused pointer.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8b496c7e62ef..64d71968aa91 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -276,12 +276,6 @@ struct dsa_port {
 
 	struct list_head list;
 
-	/*
-	 * Give the switch driver somewhere to hang its per-port private data
-	 * structures (accessible from the tagger).
-	 */
-	void *priv;
-
 	/*
 	 * Original copy of the master netdev ethtool_ops
 	 */
-- 
cgit v1.2.3


From 3cf3cdea6fe3fdb7a1e4ac1372b80408e4f56b73 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Sun, 5 Dec 2021 13:40:44 +0200
Subject: dt-bindings: iio: add AD74413R

The AD74412R and AD74413R are quad-channel, software configurable,
input/output solutions for building and process control applications.

They contain functionality for analog output, analog input, digital input,
resistance temperature detector, and thermocouple measurements integrated
into a single chip solution with an SPI interface.

The devices feature a 16-bit ADC and four configurable 13-bit DACs to
provide four configurable input/output channels and a suite of diagnostic
functions.

The AD74413R differentiates itself from the AD74412R by being
HART-compatible.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20211205114045.173612-3-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/dt-bindings/iio/addac/adi,ad74413r.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 include/dt-bindings/iio/addac/adi,ad74413r.h

(limited to 'include')

diff --git a/include/dt-bindings/iio/addac/adi,ad74413r.h b/include/dt-bindings/iio/addac/adi,ad74413r.h
new file mode 100644
index 000000000000..204f92bbd79f
--- /dev/null
+++ b/include/dt-bindings/iio/addac/adi,ad74413r.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_ADI_AD74413R_H
+#define _DT_BINDINGS_ADI_AD74413R_H
+
+#define CH_FUNC_HIGH_IMPEDANCE			0x0
+#define CH_FUNC_VOLTAGE_OUTPUT			0x1
+#define CH_FUNC_CURRENT_OUTPUT			0x2
+#define CH_FUNC_VOLTAGE_INPUT			0x3
+#define CH_FUNC_CURRENT_INPUT_EXT_POWER		0x4
+#define CH_FUNC_CURRENT_INPUT_LOOP_POWER	0x5
+#define CH_FUNC_RESISTANCE_INPUT		0x6
+#define CH_FUNC_DIGITAL_INPUT_LOGIC		0x7
+#define CH_FUNC_DIGITAL_INPUT_LOOP_POWER	0x8
+#define CH_FUNC_CURRENT_INPUT_EXT_POWER_HART	0x9
+#define CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART	0xA
+
+#define CH_FUNC_MIN	CH_FUNC_HIGH_IMPEDANCE
+#define CH_FUNC_MAX	CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART
+
+#endif /* _DT_BINDINGS_ADI_AD74413R_H */
-- 
cgit v1.2.3


From 9020ef659885f2622cfb386cc229b6d618362895 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 17 Oct 2021 18:22:09 +0100
Subject: iio: trigger: Fix a scheduling whilst atomic issue seen on tsc2046

IIO triggers are software IRQ chips that split an incoming IRQ into
separate IRQs routed to all devices using the trigger.
When all consumers are done then a trigger callback reenable() is
called.  There are a few circumstances under which this can happen
in atomic context.

1) A single user of the trigger that calls the iio_trigger_done()
function from interrupt context.
2) A race between disconnecting the last device from a trigger and
the trigger itself sucessfully being disabled.

To avoid a resulting scheduling whilst atomic, close this second corner
by using schedule_work() to ensure the reenable is not done in atomic
context.

Note that drivers must be careful to manage the interaction of
set_state() and reenable() callbacks to ensure appropriate reference
counting if they are relying on the same hardware controls.

Deliberately taking this the slow path rather than via a fixes tree
because the error has hard to hit and I would like it to soak for a while
before hitting a release kernel.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: <Stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211017172209.112387-1-jic23@kernel.org
---
 include/linux/iio/trigger.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 096f68dd2e0c..4c69b144677b 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -55,6 +55,7 @@ struct iio_trigger_ops {
  * @attached_own_device:[INTERN] if we are using our own device as trigger,
  *			i.e. if we registered a poll function to the same
  *			device as the one providing the trigger.
+ * @reenable_work:	[INTERN] work item used to ensure reenable can sleep.
  **/
 struct iio_trigger {
 	const struct iio_trigger_ops	*ops;
@@ -74,6 +75,7 @@ struct iio_trigger {
 	unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)];
 	struct mutex			pool_lock;
 	bool				attached_own_device;
+	struct work_struct		reenable_work;
 };
 
 
-- 
cgit v1.2.3


From 3ac27afefd5dd6a53e830542b899f092a58b6b51 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 5 Dec 2021 17:01:29 +0000
Subject: iio:dac:ad5755: Switch to generic firmware properties and drop pdata

Lars pointed out that platform data can also be supported via the
generic properties interface, so there is no point in continuing to
support it separately.  Hence squish the linux/platform_data/ad5755.h
header into the c file and drop accessing the platform data directly.

Done by inspection only.  Mostly completely mechanical with the
exception of a few places where default value handling is
cleaner done by first setting the value, then calling the
firmware reading function but and not checking the return value,
as opposed to reading firmware then setting the default if an error
occurs.

Part of general attempt to move all of IIO over to generic
device properties, both to enable other firmware types and
to remove drivers that can be the source of of_ specific
behaviour in new drivers.

Suggested-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
---
 include/linux/platform_data/ad5755.h | 102 -----------------------------------
 1 file changed, 102 deletions(-)
 delete mode 100644 include/linux/platform_data/ad5755.h

(limited to 'include')

diff --git a/include/linux/platform_data/ad5755.h b/include/linux/platform_data/ad5755.h
deleted file mode 100644
index e371e08f04bc..000000000000
--- a/include/linux/platform_data/ad5755.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2012 Analog Devices Inc.
- */
-#ifndef __LINUX_PLATFORM_DATA_AD5755_H__
-#define __LINUX_PLATFORM_DATA_AD5755_H__
-
-enum ad5755_mode {
-	AD5755_MODE_VOLTAGE_0V_5V		= 0,
-	AD5755_MODE_VOLTAGE_0V_10V		= 1,
-	AD5755_MODE_VOLTAGE_PLUSMINUS_5V	= 2,
-	AD5755_MODE_VOLTAGE_PLUSMINUS_10V	= 3,
-	AD5755_MODE_CURRENT_4mA_20mA		= 4,
-	AD5755_MODE_CURRENT_0mA_20mA		= 5,
-	AD5755_MODE_CURRENT_0mA_24mA		= 6,
-};
-
-enum ad5755_dc_dc_phase {
-	AD5755_DC_DC_PHASE_ALL_SAME_EDGE		= 0,
-	AD5755_DC_DC_PHASE_A_B_SAME_EDGE_C_D_OPP_EDGE	= 1,
-	AD5755_DC_DC_PHASE_A_C_SAME_EDGE_B_D_OPP_EDGE	= 2,
-	AD5755_DC_DC_PHASE_90_DEGREE			= 3,
-};
-
-enum ad5755_dc_dc_freq {
-	AD5755_DC_DC_FREQ_250kHZ = 0,
-	AD5755_DC_DC_FREQ_410kHZ = 1,
-	AD5755_DC_DC_FREQ_650kHZ = 2,
-};
-
-enum ad5755_dc_dc_maxv {
-	AD5755_DC_DC_MAXV_23V	= 0,
-	AD5755_DC_DC_MAXV_24V5	= 1,
-	AD5755_DC_DC_MAXV_27V	= 2,
-	AD5755_DC_DC_MAXV_29V5	= 3,
-};
-
-enum ad5755_slew_rate {
-	AD5755_SLEW_RATE_64k	= 0,
-	AD5755_SLEW_RATE_32k	= 1,
-	AD5755_SLEW_RATE_16k	= 2,
-	AD5755_SLEW_RATE_8k	= 3,
-	AD5755_SLEW_RATE_4k	= 4,
-	AD5755_SLEW_RATE_2k	= 5,
-	AD5755_SLEW_RATE_1k	= 6,
-	AD5755_SLEW_RATE_500	= 7,
-	AD5755_SLEW_RATE_250	= 8,
-	AD5755_SLEW_RATE_125	= 9,
-	AD5755_SLEW_RATE_64	= 10,
-	AD5755_SLEW_RATE_32	= 11,
-	AD5755_SLEW_RATE_16	= 12,
-	AD5755_SLEW_RATE_8	= 13,
-	AD5755_SLEW_RATE_4	= 14,
-	AD5755_SLEW_RATE_0_5	= 15,
-};
-
-enum ad5755_slew_step_size {
-	AD5755_SLEW_STEP_SIZE_1 = 0,
-	AD5755_SLEW_STEP_SIZE_2 = 1,
-	AD5755_SLEW_STEP_SIZE_4 = 2,
-	AD5755_SLEW_STEP_SIZE_8 = 3,
-	AD5755_SLEW_STEP_SIZE_16 = 4,
-	AD5755_SLEW_STEP_SIZE_32 = 5,
-	AD5755_SLEW_STEP_SIZE_64 = 6,
-	AD5755_SLEW_STEP_SIZE_128 = 7,
-	AD5755_SLEW_STEP_SIZE_256 = 8,
-};
-
-/**
- * struct ad5755_platform_data - AD5755 DAC driver platform data
- * @ext_dc_dc_compenstation_resistor: Whether an external DC-DC converter
- * compensation register is used.
- * @dc_dc_phase: DC-DC converter phase.
- * @dc_dc_freq: DC-DC converter frequency.
- * @dc_dc_maxv: DC-DC maximum allowed boost voltage.
- * @dac.mode: The mode to be used for the DAC output.
- * @dac.ext_current_sense_resistor: Whether an external current sense resistor
- * is used.
- * @dac.enable_voltage_overrange: Whether to enable 20% voltage output overrange.
- * @dac.slew.enable: Whether to enable digital slew.
- * @dac.slew.rate: Slew rate of the digital slew.
- * @dac.slew.step_size: Slew step size of the digital slew.
- **/
-struct ad5755_platform_data {
-	bool ext_dc_dc_compenstation_resistor;
-	enum ad5755_dc_dc_phase dc_dc_phase;
-	enum ad5755_dc_dc_freq dc_dc_freq;
-	enum ad5755_dc_dc_maxv dc_dc_maxv;
-
-	struct {
-		enum ad5755_mode mode;
-		bool ext_current_sense_resistor;
-		bool enable_voltage_overrange;
-		struct {
-			bool enable;
-			enum ad5755_slew_rate rate;
-			enum ad5755_slew_step_size step_size;
-		} slew;
-	} dac[4];
-};
-
-#endif
-- 
cgit v1.2.3


From fb6723daf89083a0d2290f3a0abc777e40766c84 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sat, 29 May 2021 12:33:53 +0900
Subject: ALSA: pcm: comment about relation between msbits hw parameter and
 [S|U]32 formats

Regarding to handling [U|S][32|24] PCM formats, many userspace
application developers and driver developers have confusion, since they
require them to understand justification or padding. It easily
loses consistency and soundness to operate with many type of devices. In
this commit, I attempt to solve the situation by adding comment about
relation between [S|U]32 formats and 'msbits' hardware parameter.

The formats are used for 'left-justified' sample format, and the available
bit count in most significant bit is delivered to userspace in msbits
hardware parameter (struct snd_pcm_hw_params.msbits), which is decided by
msbits constraint added by pcm drivers (snd_pcm_hw_constraint_msbits()).

In driver side, the msbits constraint includes two elements; the physical
width of format and the available width of the format in most significant
bit. The former is used to match SAMPLE_BITS of format. (For my
convenience, I ignore wildcard in the usage of the constraint.)

As a result of interaction between ALSA pcm core and ALSA pcm application,
when the format in which SAMPLE_BITS equals to physical width of the
msbits constaint, the msbits parameter is set by referring to the
available width of the constraint. When the msbits parameter is not
changed in the above process, ALSA pcm core set it alternatively with
SAMPLE_BIT of chosen format.

In userspace application side, the msbits is only available after calling
ioctl(2) with SNDRV_PCM_IOCTL_HW_PARAMS request. Even if the hardware
parameter structure includes somewhat value of SAMPLE_BITS interval
parameter as width of format, all of the width is not always available
since msbits can be less than the width.

I note that [S|U]24 formats are used for 'right-justified' 24 bit sample
formats within 32 bit frame. The first byte in most significant bit
should be invalidated. Although the msbits exposed to userspace should be
zero as invalid value, actually it is 32 from physical width of format.

[ corrected typos -- tiwai ]

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210529033353.21641-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h         | 3 +++
 include/uapi/sound/asound.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 33451f8ff755..9b187d86e1bd 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -147,6 +147,9 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_FMTBIT_S24_BE		_SNDRV_PCM_FMTBIT(S24_BE)
 #define SNDRV_PCM_FMTBIT_U24_LE		_SNDRV_PCM_FMTBIT(U24_LE)
 #define SNDRV_PCM_FMTBIT_U24_BE		_SNDRV_PCM_FMTBIT(U24_BE)
+// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
+// available bit count in most significant bit. It's for the case of so-called 'left-justified' or
+// `right-padding` sample which has less width than 32 bit.
 #define SNDRV_PCM_FMTBIT_S32_LE		_SNDRV_PCM_FMTBIT(S32_LE)
 #define SNDRV_PCM_FMTBIT_S32_BE		_SNDRV_PCM_FMTBIT(S32_BE)
 #define SNDRV_PCM_FMTBIT_U32_LE		_SNDRV_PCM_FMTBIT(U32_LE)
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 5fbb79e30819..cf1d20e34167 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -202,6 +202,9 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_S24_BE	((__force snd_pcm_format_t) 7) /* low three bytes */
 #define	SNDRV_PCM_FORMAT_U24_LE	((__force snd_pcm_format_t) 8) /* low three bytes */
 #define	SNDRV_PCM_FORMAT_U24_BE	((__force snd_pcm_format_t) 9) /* low three bytes */
+// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
+// available bit count in most significant bit. It's for the case of so-called 'left-justified' or
+// `right-padding` sample which has less width than 32 bit.
 #define	SNDRV_PCM_FORMAT_S32_LE	((__force snd_pcm_format_t) 10)
 #define	SNDRV_PCM_FORMAT_S32_BE	((__force snd_pcm_format_t) 11)
 #define	SNDRV_PCM_FORMAT_U32_LE	((__force snd_pcm_format_t) 12)
-- 
cgit v1.2.3


From 55b71f6c29f2a78af42dd453dfed895eba516cb4 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 13 Dec 2021 17:12:57 +0900
Subject: ALSA: uapi: use C90 comment style instead of C99 style

UAPI headers are built with compiler option for C90, thus double-slashes
comment introduced in C99 is not preferable.

Fixes: fb6723daf890 ("ALSA: pcm: comment about relation between msbits hw parameter and [S|U]32 formats")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20211213081257.36097-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/asound.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index cf1d20e34167..1834f58b8ede 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -202,9 +202,11 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_S24_BE	((__force snd_pcm_format_t) 7) /* low three bytes */
 #define	SNDRV_PCM_FORMAT_U24_LE	((__force snd_pcm_format_t) 8) /* low three bytes */
 #define	SNDRV_PCM_FORMAT_U24_BE	((__force snd_pcm_format_t) 9) /* low three bytes */
-// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
-// available bit count in most significant bit. It's for the case of so-called 'left-justified' or
-// `right-padding` sample which has less width than 32 bit.
+/*
+ * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
+ * available bit count in most significant bit. It's for the case of so-called 'left-justified' or
+ * `right-padding` sample which has less width than 32 bit.
+ */
 #define	SNDRV_PCM_FORMAT_S32_LE	((__force snd_pcm_format_t) 10)
 #define	SNDRV_PCM_FORMAT_S32_BE	((__force snd_pcm_format_t) 11)
 #define	SNDRV_PCM_FORMAT_U32_LE	((__force snd_pcm_format_t) 12)
-- 
cgit v1.2.3


From 9d9bcae47fd5a0b827521f65ab7d10a218eacc37 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 3 Dec 2021 11:28:44 +0100
Subject: ACPI: delay enumeration of devices with a _DEP pointing to an INT3472
 device

The clk and regulator frameworks expect clk/regulator consumer-devices
to have info about the consumed clks/regulators described in the device's
fw_node.

To work around cases where this info is not present in the firmware tables,
which is often the case on x86/ACPI devices, both frameworks allow the
provider-driver to attach info about consumers to the clks/regulators
when registering these.

This causes problems with the probe ordering wrt drivers for consumers
of these clks/regulators. Since the lookups are only registered when the
provider-driver binds, trying to get these clks/regulators before then
results in a -ENOENT error for clks and a dummy regulator for regulators.

One case where we hit this issue is camera sensors such as e.g. the OV8865
sensor found on the Microsoft Surface Go. The sensor uses clks, regulators
and GPIOs provided by a TPS68470 PMIC which is described in an INT3472
ACPI device. There is special platform code handling this and setting
platform_data with the necessary consumer info on the MFD cells
instantiated for the PMIC under: drivers/platform/x86/intel/int3472.

For this to work properly the ov8865 driver must not bind to the I2C-client
for the OV8865 sensor until after the TPS68470 PMIC gpio, regulator and
clk MFD cells have all been fully setup.

The OV8865 on the Microsoft Surface Go is just one example, all X86
devices using the Intel IPU3 camera block found on recent Intel SoCs
have similar issues where there is an INT3472 HID ACPI-device, which
describes the clks and regulators, and the driver for this INT3472 device
must be fully initialized before the sensor driver (any sensor driver)
binds for things to work properly.

On these devices the ACPI nodes describing the sensors all have a _DEP
dependency on the matching INT3472 ACPI device (there is one per sensor).

This allows solving the probe-ordering problem by delaying the enumeration
(instantiation of the I2C-client in the ov8865 example) of ACPI-devices
which have a _DEP dependency on an INT3472 device.

The new acpi_dev_ready_for_enumeration() helper used for this is also
exported because for devices, which have the enumeration_by_parent flag
set, the parent-driver will do its own scan of child ACPI devices and
it will try to enumerate those during its probe(). Code doing this such
as e.g. the i2c-core-acpi.c code must call this new helper to ensure
that it too delays the enumeration until all the _DEP dependencies are
met on devices which have the new honor_deps flag set.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20211203102857.44539-2-hdegoede@redhat.com
---
 include/acpi/acpi_bus.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 480f9207a4c6..2f93ecf05dac 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -202,7 +202,8 @@ struct acpi_device_flags {
 	u32 coherent_dma:1;
 	u32 cca_seen:1;
 	u32 enumeration_by_parent:1;
-	u32 reserved:19;
+	u32 honor_deps:1;
+	u32 reserved:18;
 };
 
 /* File System */
@@ -285,6 +286,7 @@ struct acpi_dep_data {
 	struct list_head node;
 	acpi_handle supplier;
 	acpi_handle consumer;
+	bool honor_dep;
 };
 
 /* Performance Management */
@@ -693,6 +695,7 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev)
 bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2);
 
 void acpi_dev_clear_dependencies(struct acpi_device *supplier);
+bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
 struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier);
 struct acpi_device *
 acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv);
-- 
cgit v1.2.3


From c537be0bfad6337f2afd618fe252c03217191405 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 3 Dec 2021 11:28:46 +0100
Subject: i2c: acpi: Add i2c_acpi_new_device_by_fwnode() function

Change i2c_acpi_new_device() into i2c_acpi_new_device_by_fwnode() and
add a static inline wrapper providing the old i2c_acpi_new_device()
behavior.

This is necessary because in some cases we may only have access
to the fwnode / acpi_device and not to the matching physical-node
struct device *.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Wolfram Sang <wsa@kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20211203102857.44539-4-hdegoede@redhat.com
---
 include/linux/i2c.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 16119ac1aa97..7d4f52ceb7b5 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -1025,8 +1025,9 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 			       struct acpi_resource_i2c_serialbus **i2c);
 int i2c_acpi_client_count(struct acpi_device *adev);
 u32 i2c_acpi_find_bus_speed(struct device *dev);
-struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
-				       struct i2c_board_info *info);
+struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode,
+						 int index,
+						 struct i2c_board_info *info);
 struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle);
 bool i2c_acpi_waive_d0_probe(struct device *dev);
 #else
@@ -1043,8 +1044,9 @@ static inline u32 i2c_acpi_find_bus_speed(struct device *dev)
 {
 	return 0;
 }
-static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
-					int index, struct i2c_board_info *info)
+static inline struct i2c_client *i2c_acpi_new_device_by_fwnode(
+					struct fwnode_handle *fwnode, int index,
+					struct i2c_board_info *info)
 {
 	return ERR_PTR(-ENODEV);
 }
@@ -1058,4 +1060,11 @@ static inline bool i2c_acpi_waive_d0_probe(struct device *dev)
 }
 #endif /* CONFIG_ACPI */
 
+static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
+						     int index,
+						     struct i2c_board_info *info)
+{
+	return i2c_acpi_new_device_by_fwnode(dev_fwnode(dev), index, info);
+}
+
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From 9dfa374cc6d04d2515adc21c39e356b64ee45a29 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 3 Dec 2021 11:28:47 +0100
Subject: platform_data: Add linux/platform_data/tps68470.h file

The clk and regulator frameworks expect clk/regulator consumer-devices
to have info about the consumed clks/regulators described in the device's
fw_node.

To work around cases where this info is not present in the firmware tables,
which is often the case on x86/ACPI devices, both frameworks allow the
provider-driver to attach info about consumers to the provider-device
during probe/registration of the provider device.

The TI TPS68470 PMIC is used x86/ACPI devices with the consumer-info
missing from the ACPI tables. Thus the tps68470-clk and tps68470-regulator
drivers must provide the consumer-info at probe time.

Define tps68470_clk_platform_data and tps68470_regulator_platform_data
structs to allow the x86 platform code to pass the necessary consumer info
to these drivers.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20211203102857.44539-5-hdegoede@redhat.com
---
 include/linux/platform_data/tps68470.h | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 include/linux/platform_data/tps68470.h

(limited to 'include')

diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h
new file mode 100644
index 000000000000..126d082c3f2e
--- /dev/null
+++ b/include/linux/platform_data/tps68470.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * TI TPS68470 PMIC platform data definition.
+ *
+ * Copyright (c) 2021 Red Hat Inc.
+ *
+ * Red Hat authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ */
+#ifndef __PDATA_TPS68470_H
+#define __PDATA_TPS68470_H
+
+enum tps68470_regulators {
+	TPS68470_CORE,
+	TPS68470_ANA,
+	TPS68470_VCM,
+	TPS68470_VIO,
+	TPS68470_VSIO,
+	TPS68470_AUX1,
+	TPS68470_AUX2,
+	TPS68470_NUM_REGULATORS
+};
+
+struct regulator_init_data;
+
+struct tps68470_regulator_platform_data {
+	const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS];
+};
+
+struct tps68470_clk_platform_data {
+	const char *consumer_dev_name;
+	const char *consumer_con_id;
+};
+
+#endif
-- 
cgit v1.2.3


From e522ae91b8ff7bf89d22d9322308aba1a6326996 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Sat, 4 Dec 2021 21:57:53 +0200
Subject: dt-bindings: soc: samsung: Add Exynos USI bindings

Add constants for choosing USIv2 configuration mode in device tree.
Those are further used in USI driver to figure out which value to write
into SW_CONF register. Also document USIv2 IP-core bindings.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211204195757.8600-2-semen.protsenko@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 include/dt-bindings/soc/samsung,exynos-usi.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 include/dt-bindings/soc/samsung,exynos-usi.h

(limited to 'include')

diff --git a/include/dt-bindings/soc/samsung,exynos-usi.h b/include/dt-bindings/soc/samsung,exynos-usi.h
new file mode 100644
index 000000000000..a01af169d249
--- /dev/null
+++ b/include/dt-bindings/soc/samsung,exynos-usi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021 Linaro Ltd.
+ * Author: Sam Protsenko <semen.protsenko@linaro.org>
+ *
+ * Device Tree bindings for Samsung Exynos USI (Universal Serial Interface).
+ */
+
+#ifndef __DT_BINDINGS_SAMSUNG_EXYNOS_USI_H
+#define __DT_BINDINGS_SAMSUNG_EXYNOS_USI_H
+
+#define USI_V2_NONE		0
+#define USI_V2_UART		1
+#define USI_V2_SPI		2
+#define USI_V2_I2C		3
+
+#endif /* __DT_BINDINGS_SAMSUNG_EXYNOS_USI_H */
-- 
cgit v1.2.3


From 614b7a1f28f401332736235a7d2d17ceda16945c Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 10 Dec 2021 20:56:36 +0100
Subject: bareudp: Remove bareudp_dev_create()

There's no user for this function.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bareudp.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bareudp.h b/include/net/bareudp.h
index dc65a0d71d9b..8f07a91e0f25 100644
--- a/include/net/bareudp.h
+++ b/include/net/bareudp.h
@@ -14,10 +14,6 @@ struct bareudp_conf {
 	bool multi_proto_mode;
 };
 
-struct net_device *bareudp_dev_create(struct net *net, const char *name,
-				      u8 name_assign_type,
-				      struct bareudp_conf *info);
-
 static inline bool netif_is_bareudp(const struct net_device *dev)
 {
 	return dev->rtnl_link_ops &&
-- 
cgit v1.2.3


From dcdd77ee55a70b6ddfcfbe8e6bbf6d002994644c Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 10 Dec 2021 20:56:39 +0100
Subject: bareudp: Move definition of struct bareudp_conf to bareudp.c

This structure is used only in bareudp.c.

While there, adjust include files: we need netdevice.h, not skbuff.h.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bareudp.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/bareudp.h b/include/net/bareudp.h
index 8f07a91e0f25..17610c8d6361 100644
--- a/include/net/bareudp.h
+++ b/include/net/bareudp.h
@@ -3,17 +3,10 @@
 #ifndef __NET_BAREUDP_H
 #define __NET_BAREUDP_H
 
+#include <linux/netdevice.h>
 #include <linux/types.h>
-#include <linux/skbuff.h>
 #include <net/rtnetlink.h>
 
-struct bareudp_conf {
-	__be16 ethertype;
-	__be16 port;
-	u16 sport_min;
-	bool multi_proto_mode;
-};
-
 static inline bool netif_is_bareudp(const struct net_device *dev)
 {
 	return dev->rtnl_link_ops &&
-- 
cgit v1.2.3


From 3c118547f87e930d45a5787e386734015dd93b32 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 10 Dec 2021 21:29:59 +0100
Subject: u64_stats: Disable preemption on 32bit UP+SMP PREEMPT_RT during
 updates.

On PREEMPT_RT the seqcount_t for synchronisation is required on 32bit
architectures even on UP because the softirq (and the threaded IRQ handler) can
be preempted.

With the seqcount_t for synchronisation, a reader with higher priority can
preempt the writer and then spin endlessly in read_seqcount_begin() while the
writer can't make progress.

To avoid such a lock up on PREEMPT_RT the writer must disable preemption during
the update. There is no need to disable interrupts because no writer is using
this API in hard-IRQ context on PREEMPT_RT.

Disable preemption on 32bit-RT within the u64_stats write section.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/u64_stats_sync.h | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index e8ec116c916b..6ad4e9032d53 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -66,7 +66,7 @@
 #include <linux/seqlock.h>
 
 struct u64_stats_sync {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	seqcount_t	seq;
 #endif
 };
@@ -125,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
 }
 #endif
 
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 #define u64_stats_init(syncp)	seqcount_init(&(syncp)->seq)
 #else
 static inline void u64_stats_init(struct u64_stats_sync *syncp)
@@ -135,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
 
 static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
 	write_seqcount_begin(&syncp->seq);
 #endif
 }
 
 static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	write_seqcount_end(&syncp->seq);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 #endif
 }
 
@@ -152,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
 {
 	unsigned long flags = 0;
 
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	local_irq_save(flags);
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+	else
+		local_irq_save(flags);
 	write_seqcount_begin(&syncp->seq);
 #endif
 	return flags;
@@ -163,15 +170,18 @@ static inline void
 u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
 				unsigned long flags)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	write_seqcount_end(&syncp->seq);
-	local_irq_restore(flags);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
+	else
+		local_irq_restore(flags);
 #endif
 }
 
 static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	return read_seqcount_begin(&syncp->seq);
 #else
 	return 0;
@@ -180,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
 
 static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
 	preempt_disable();
 #endif
 	return __u64_stats_fetch_begin(syncp);
@@ -189,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
 static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
 	return read_seqcount_retry(&syncp->seq, start);
 #else
 	return false;
@@ -199,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
 	preempt_enable();
 #endif
 	return __u64_stats_fetch_retry(syncp, start);
@@ -213,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
  */
 static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+	preempt_disable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
 	local_irq_disable();
 #endif
 	return __u64_stats_fetch_begin(syncp);
@@ -222,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
 static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
 					     unsigned int start)
 {
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+	preempt_enable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
 	local_irq_enable();
 #endif
 	return __u64_stats_fetch_retry(syncp, start);
-- 
cgit v1.2.3


From bae9401dff62d1ac46504a343db8a69e5ac390f6 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 23 Nov 2021 14:20:46 +0100
Subject: usb: Add Xen pvUSB protocol description

Add the definition of pvUSB protocol used between the pvUSB frontend in
a Xen domU and the pvUSB backend in a Xen driver domain (usually Dom0).

This header was originally provided by Fujitsu for Xen based on Linux
2.6.18.

Changes are:
- adapt to Linux kernel style guide
- use Xen namespace
- add lots of comments
- don't use kernel internal defines

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20211123132048.5335-2-jgross@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/xen/interface/io/usbif.h | 405 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 405 insertions(+)
 create mode 100644 include/xen/interface/io/usbif.h

(limited to 'include')

diff --git a/include/xen/interface/io/usbif.h b/include/xen/interface/io/usbif.h
new file mode 100644
index 000000000000..a70e0b93178b
--- /dev/null
+++ b/include/xen/interface/io/usbif.h
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * usbif.h
+ *
+ * USB I/O interface for Xen guest OSes.
+ *
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_USBIF_H__
+#define __XEN_PUBLIC_IO_USBIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Detailed Interface Description
+ * ==============================
+ * The pvUSB interface is using a split driver design: a frontend driver in
+ * the guest and a backend driver in a driver domain (normally dom0) having
+ * access to the physical USB device(s) being passed to the guest.
+ *
+ * The frontend and backend drivers use XenStore to initiate the connection
+ * between them, the I/O activity is handled via two shared ring pages and an
+ * event channel. As the interface between frontend and backend is at the USB
+ * host connector level, multiple (up to 31) physical USB devices can be
+ * handled by a single connection.
+ *
+ * The Xen pvUSB device name is "qusb", so the frontend's XenStore entries are
+ * to be found under "device/qusb", while the backend's XenStore entries are
+ * under "backend/<guest-dom-id>/qusb".
+ *
+ * When a new pvUSB connection is established, the frontend needs to setup the
+ * two shared ring pages for communication and the event channel. The ring
+ * pages need to be made available to the backend via the grant table
+ * interface.
+ *
+ * One of the shared ring pages is used by the backend to inform the frontend
+ * about USB device plug events (device to be added or removed). This is the
+ * "conn-ring".
+ *
+ * The other ring page is used for USB I/O communication (requests and
+ * responses). This is the "urb-ring".
+ *
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * num-ports
+ *      Values:         unsigned [1...31]
+ *
+ *      Number of ports for this (virtual) USB host connector.
+ *
+ * usb-ver
+ *      Values:         unsigned [1...2]
+ *
+ *      USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0.
+ *
+ * port/[1...31]
+ *      Values:         string
+ *
+ *      Physical USB device connected to the given port, e.g. "3-1.5".
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         unsigned
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * urb-ring-ref
+ *      Values:         unsigned
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer. This is the ring
+ *      buffer for urb requests.
+ *
+ * conn-ring-ref
+ *      Values:         unsigned
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer. This is the ring
+ *      buffer for connection/disconnection requests.
+ *
+ * protocol
+ *      Values:         string (XEN_IO_PROTO_ABI_*)
+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
+ *
+ *      The machine ABI rules governing the format of all ring request and
+ *      response structures.
+ *
+ * Protocol Description
+ * ====================
+ *
+ *-------------------------- USB device plug events --------------------------
+ *
+ * USB device plug events are send via the "conn-ring" shared page. As only
+ * events are being sent, the respective requests from the frontend to the
+ * backend are just dummy ones.
+ * The events sent to the frontend have the following layout:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |               id                |    portnum     |     speed      | 4
+ * +----------------+----------------+----------------+----------------+
+ *   id - uint16_t, event id (taken from the actual frontend dummy request)
+ *   portnum - uint8_t, port number (1 ... 31)
+ *   speed - uint8_t, device XENUSB_SPEED_*, XENUSB_SPEED_NONE == unplug
+ *
+ * The dummy request:
+ *         0                1        octet
+ * +----------------+----------------+
+ * |               id                | 2
+ * +----------------+----------------+
+ *   id - uint16_t, guest supplied value (no need for being unique)
+ *
+ *-------------------------- USB I/O request ---------------------------------
+ *
+ * A single USB I/O request on the "urb-ring" has the following layout:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |               id                |         nr_buffer_segs          | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                               pipe                                | 8
+ * +----------------+----------------+----------------+----------------+
+ * |         transfer_flags          |          buffer_length          | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                       request type specific                       | 16
+ * |                               data                                | 20
+ * +----------------+----------------+----------------+----------------+
+ * |                              seg[0]                               | 24
+ * |                               data                                | 28
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |             seg[XENUSB_MAX_SEGMENTS_PER_REQUEST - 1]              | 144
+ * |                               data                                | 148
+ * +----------------+----------------+----------------+----------------+
+ * Bit field bit number 0 is always least significant bit, undefined bits must
+ * be zero.
+ *   id - uint16_t, guest supplied value
+ *   nr_buffer_segs - uint16_t, number of segment entries in seg[] array
+ *   pipe - uint32_t, bit field with multiple information:
+ *     bits 0-4: port request to send to
+ *     bit 5: unlink request with specified id (cancel I/O) if set (see below)
+ *     bit 7: direction (1 = read from device)
+ *     bits 8-14: device number on port
+ *     bits 15-18: endpoint of device
+ *     bits 30-31: request type: 00 = isochronous, 01 = interrupt,
+ *                               10 = control, 11 = bulk
+ *   transfer_flags - uint16_t, bit field with processing flags:
+ *     bit 0: less data than specified allowed
+ *   buffer_length - uint16_t, total length of data
+ *   request type specific data - 8 bytes, see below
+ *   seg[] - array with 8 byte elements, see below
+ *
+ * Request type specific data for isochronous request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |            interval             |           start_frame           | 4
+ * +----------------+----------------+----------------+----------------+
+ * |       number_of_packets         |       nr_frame_desc_segs        | 8
+ * +----------------+----------------+----------------+----------------+
+ *   interval - uint16_t, time interval in msecs between frames
+ *   start_frame - uint16_t, start frame number
+ *   number_of_packets - uint16_t, number of packets to transfer
+ *   nr_frame_desc_segs - uint16_t number of seg[] frame descriptors elements
+ *
+ * Request type specific data for interrupt request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |            interval             |                0                | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                                 0                                 | 8
+ * +----------------+----------------+----------------+----------------+
+ *   interval - uint16_t, time in msecs until interruption
+ *
+ * Request type specific data for control request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |                      data of setup packet                         | 4
+ * |                                                                   | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Request type specific data for bulk request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |                                 0                                 | 4
+ * |                                 0                                 | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Request type specific data for unlink request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |           unlink_id             |                0                | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                                 0                                 | 8
+ * +----------------+----------------+----------------+----------------+
+ *   unlink_id - uint16_t, request id of request to terminate
+ *
+ * seg[] array element layout:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |                               gref                                | 4
+ * +----------------+----------------+----------------+----------------+
+ * |             offset              |             length              | 8
+ * +----------------+----------------+----------------+----------------+
+ *   gref - uint32_t, grant reference of buffer page
+ *   offset - uint16_t, offset of buffer start in page
+ *   length - uint16_t, length of buffer in page
+ *
+ *-------------------------- USB I/O response --------------------------------
+ *
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |               id                |          start_frame            | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                              status                               | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                          actual_length                            | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                           error_count                             | 16
+ * +----------------+----------------+----------------+----------------+
+ *   id - uint16_t, id of the request this response belongs to
+ *   start_frame - uint16_t, start_frame this response (iso requests only)
+ *   status - int32_t, XENUSB_STATUS_* (non-iso requests)
+ *   actual_length - uint32_t, actual size of data transferred
+ *   error_count - uint32_t, number of errors (iso requests)
+ */
+
+enum xenusb_spec_version {
+	XENUSB_VER_UNKNOWN = 0,
+	XENUSB_VER_USB11,
+	XENUSB_VER_USB20,
+	XENUSB_VER_USB30,	/* not supported yet */
+};
+
+/*
+ *  USB pipe in xenusb_request
+ *
+ *  - port number:      bits 0-4
+ *                              (USB_MAXCHILDREN is 31)
+ *
+ *  - operation flag:   bit 5
+ *                              (0 = submit urb,
+ *                               1 = unlink urb)
+ *
+ *  - direction:        bit 7
+ *                              (0 = Host-to-Device [Out]
+ *                               1 = Device-to-Host [In])
+ *
+ *  - device address:   bits 8-14
+ *
+ *  - endpoint:         bits 15-18
+ *
+ *  - pipe type:        bits 30-31
+ *                              (00 = isochronous, 01 = interrupt,
+ *                               10 = control, 11 = bulk)
+ */
+
+#define XENUSB_PIPE_PORT_MASK	0x0000001f
+#define XENUSB_PIPE_UNLINK	0x00000020
+#define XENUSB_PIPE_DIR		0x00000080
+#define XENUSB_PIPE_DEV_MASK	0x0000007f
+#define XENUSB_PIPE_DEV_SHIFT	8
+#define XENUSB_PIPE_EP_MASK	0x0000000f
+#define XENUSB_PIPE_EP_SHIFT	15
+#define XENUSB_PIPE_TYPE_MASK	0x00000003
+#define XENUSB_PIPE_TYPE_SHIFT	30
+#define XENUSB_PIPE_TYPE_ISOC	0
+#define XENUSB_PIPE_TYPE_INT	1
+#define XENUSB_PIPE_TYPE_CTRL	2
+#define XENUSB_PIPE_TYPE_BULK	3
+
+#define xenusb_pipeportnum(pipe)		((pipe) & XENUSB_PIPE_PORT_MASK)
+#define xenusb_setportnum_pipe(pipe, portnum)	((pipe) | (portnum))
+
+#define xenusb_pipeunlink(pipe)			((pipe) & XENUSB_PIPE_UNLINK)
+#define xenusb_pipesubmit(pipe)			(!xenusb_pipeunlink(pipe))
+#define xenusb_setunlink_pipe(pipe)		((pipe) | XENUSB_PIPE_UNLINK)
+
+#define xenusb_pipein(pipe)			((pipe) & XENUSB_PIPE_DIR)
+#define xenusb_pipeout(pipe)			(!xenusb_pipein(pipe))
+
+#define xenusb_pipedevice(pipe)			\
+	(((pipe) >> XENUSB_PIPE_DEV_SHIFT) & XENUSB_PIPE_DEV_MASK)
+
+#define xenusb_pipeendpoint(pipe)		\
+	(((pipe) >> XENUSB_PIPE_EP_SHIFT) & XENUSB_PIPE_EP_MASK)
+
+#define xenusb_pipetype(pipe)			\
+	(((pipe) >> XENUSB_PIPE_TYPE_SHIFT) & XENUSB_PIPE_TYPE_MASK)
+#define xenusb_pipeisoc(pipe)	(xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_ISOC)
+#define xenusb_pipeint(pipe)	(xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_INT)
+#define xenusb_pipectrl(pipe)	(xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_CTRL)
+#define xenusb_pipebulk(pipe)	(xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_BULK)
+
+#define XENUSB_MAX_SEGMENTS_PER_REQUEST (16)
+#define XENUSB_MAX_PORTNR		31
+#define XENUSB_RING_SIZE		4096
+
+/*
+ * RING for transferring urbs.
+ */
+struct xenusb_request_segment {
+	grant_ref_t gref;
+	uint16_t offset;
+	uint16_t length;
+};
+
+struct xenusb_urb_request {
+	uint16_t id;			/* request id */
+	uint16_t nr_buffer_segs;	/* number of urb->transfer_buffer segments */
+
+	/* basic urb parameter */
+	uint32_t pipe;
+	uint16_t transfer_flags;
+#define XENUSB_SHORT_NOT_OK	0x0001
+	uint16_t buffer_length;
+	union {
+		uint8_t ctrl[8];	/* setup_packet (Ctrl) */
+
+		struct {
+			uint16_t interval;	/* maximum (1024*8) in usb core */
+			uint16_t start_frame;	/* start frame */
+			uint16_t number_of_packets;	/* number of ISO packet */
+			uint16_t nr_frame_desc_segs;	/* number of iso_frame_desc segments */
+		} isoc;
+
+		struct {
+			uint16_t interval;	/* maximum (1024*8) in usb core */
+			uint16_t pad[3];
+		} intr;
+
+		struct {
+			uint16_t unlink_id;	/* unlink request id */
+			uint16_t pad[3];
+		} unlink;
+
+	} u;
+
+	/* urb data segments */
+	struct xenusb_request_segment seg[XENUSB_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct xenusb_urb_response {
+	uint16_t id;		/* request id */
+	uint16_t start_frame;	/* start frame (ISO) */
+	int32_t status;		/* status (non-ISO) */
+#define XENUSB_STATUS_OK	0
+#define XENUSB_STATUS_NODEV	(-19)
+#define XENUSB_STATUS_INVAL	(-22)
+#define XENUSB_STATUS_STALL	(-32)
+#define XENUSB_STATUS_IOERROR	(-71)
+#define XENUSB_STATUS_BABBLE	(-75)
+#define XENUSB_STATUS_SHUTDOWN	(-108)
+	int32_t actual_length;	/* actual transfer length */
+	int32_t error_count;	/* number of ISO errors */
+};
+
+DEFINE_RING_TYPES(xenusb_urb, struct xenusb_urb_request, struct xenusb_urb_response);
+#define XENUSB_URB_RING_SIZE __CONST_RING_SIZE(xenusb_urb, XENUSB_RING_SIZE)
+
+/*
+ * RING for notifying connect/disconnect events to frontend
+ */
+struct xenusb_conn_request {
+	uint16_t id;
+};
+
+struct xenusb_conn_response {
+	uint16_t id;		/* request id */
+	uint8_t portnum;	/* port number */
+	uint8_t speed;		/* usb_device_speed */
+#define XENUSB_SPEED_NONE	0
+#define XENUSB_SPEED_LOW	1
+#define XENUSB_SPEED_FULL	2
+#define XENUSB_SPEED_HIGH	3
+};
+
+DEFINE_RING_TYPES(xenusb_conn, struct xenusb_conn_request, struct xenusb_conn_response);
+#define XENUSB_CONN_RING_SIZE __CONST_RING_SIZE(xenusb_conn, XENUSB_RING_SIZE)
+
+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
-- 
cgit v1.2.3


From 4bc5e64e6cf37007e436970024e5998ee0935651 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 26 Nov 2021 01:13:32 +0100
Subject: efi: Move efifb_setup_from_dmi() prototype from arch headers

Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup
for all arches") made the Generic System Framebuffers (sysfb) driver able
to be built on non-x86 architectures.

But it left the efifb_setup_from_dmi() function prototype declaration in
the architecture specific headers. This could lead to the following
compiler warning as reported by the kernel test robot:

   drivers/firmware/efi/sysfb_efi.c:70:6: warning: no previous prototype for function 'efifb_setup_from_dmi' [-Wmissing-prototypes]
   void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
        ^
   drivers/firmware/efi/sysfb_efi.c:70:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void efifb_setup_from_dmi(struct screen_info *si, const char *opt)

Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches")
Reported-by: kernel test robot <lkp@intel.com>
Cc: <stable@vger.kernel.org> # 5.15.x
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://lore.kernel.org/r/20211126001333.555514-1-javierm@redhat.com
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index dbd39b20e034..ef8dbc0a1522 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1283,4 +1283,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find(
 }
 #endif
 
+#ifdef CONFIG_SYSFB
+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+#else
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { }
+#endif
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From f92c1e183604c20ce00eb889315fdaa8f2d9e509 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 8 Dec 2021 20:32:44 +0100
Subject: bpf: Add get_func_[arg|ret|arg_cnt] helpers

Adding following helpers for tracing programs:

Get n-th argument of the traced function:
  long bpf_get_func_arg(void *ctx, u32 n, u64 *value)

Get return value of the traced function:
  long bpf_get_func_ret(void *ctx, u64 *value)

Get arguments count of the traced function:
  long bpf_get_func_arg_cnt(void *ctx)

The trampoline now stores number of arguments on ctx-8
address, so it's easy to verify argument index and find
return value argument's position.

Moving function ip address on the trampoline stack behind
the number of functions arguments, so it's now stored on
ctx-16 address if it's needed.

All helpers above are inlined by verifier.

Also bit unrelated small change - using newly added function
bpf_prog_has_trampoline in check_get_func_ip.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211208193245.172141-5-jolsa@kernel.org
---
 include/linux/bpf.h      |  5 +++++
 include/uapi/linux/bpf.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7a40022e3d00..965fffaf0308 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -777,6 +777,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
 int bpf_jit_charge_modmem(u32 pages);
 void bpf_jit_uncharge_modmem(u32 pages);
+bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
 					   struct bpf_trampoline *tr)
@@ -805,6 +806,10 @@ static inline bool is_bpf_image_address(unsigned long address)
 {
 	return false;
 }
+static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
+{
+	return false;
+}
 #endif
 
 struct bpf_func_info_aux {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2820c77e4846..b0383d371b9a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4993,6 +4993,31 @@ union bpf_attr {
  *		An integer less than, equal to, or greater than zero
  *		if the first **s1_sz** bytes of **s1** is found to be
  *		less than, to match, or be greater than **s2**.
+ *
+ * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
+ *	Description
+ *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ *		returned in **value**.
+ *
+ *	Return
+ *		0 on success.
+ *		**-EINVAL** if n >= arguments count of traced function.
+ *
+ * long bpf_get_func_ret(void *ctx, u64 *value)
+ *	Description
+ *		Get return value of the traced function (for tracing programs)
+ *		in **value**.
+ *
+ *	Return
+ *		0 on success.
+ *		**-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.
+ *
+ * long bpf_get_func_arg_cnt(void *ctx)
+ *	Description
+ *		Get number of arguments of the traced function (for tracing programs).
+ *
+ *	Return
+ *		The number of arguments of the traced function.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5178,6 +5203,9 @@ union bpf_attr {
 	FN(find_vma),			\
 	FN(loop),			\
 	FN(strncmp),			\
+	FN(get_func_arg),		\
+	FN(get_func_ret),		\
+	FN(get_func_arg_cnt),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From beba81faad86fc2bad567b1c029d6a000a43ca78 Mon Sep 17 00:00:00 2001
From: Aswath Govindraju <a-govindraju@ti.com>
Date: Tue, 7 Dec 2021 13:39:01 +0530
Subject: dt-bindings: pinctrl: k3: Introduce pinmux definitions for J721S2

Add pinctrl macros for J721S2 SoC. These macro definitions are
similar to that of J721E, but adding new definitions to avoid
any naming confusions in the soc dts files.

checkpatch insists the following error exists:
ERROR: Macros with complex values should be enclosed in parentheses

However, we do not need parentheses enclosing the values for this
macro as we do intend it to generate two separate values as has been
done for other similar platforms.

Signed-off-by: Aswath Govindraju <a-govindraju@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211207080904.14324-3-a-govindraju@ti.com
---
 include/dt-bindings/pinctrl/k3.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/k3.h b/include/dt-bindings/pinctrl/k3.h
index e085f102b283..63e038e36ca3 100644
--- a/include/dt-bindings/pinctrl/k3.h
+++ b/include/dt-bindings/pinctrl/k3.h
@@ -38,4 +38,7 @@
 #define AM64X_IOPAD(pa, val, muxmode)		(((pa) & 0x1fff)) ((val) | (muxmode))
 #define AM64X_MCU_IOPAD(pa, val, muxmode)	(((pa) & 0x1fff)) ((val) | (muxmode))
 
+#define J721S2_IOPAD(pa, val, muxmode)		(((pa) & 0x1fff)) ((val) | (muxmode))
+#define J721S2_WKUP_IOPAD(pa, val, muxmode)	(((pa) & 0x1fff)) ((val) | (muxmode))
+
 #endif
-- 
cgit v1.2.3


From 0e25498f8cd43c1b5aa327f373dd094e9a006da7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jun 2021 14:52:01 -0500
Subject: exit: Add and use make_task_dead.

There are two big uses of do_exit.  The first is it's design use to be
the guts of the exit(2) system call.  The second use is to terminate
a task after something catastrophic has happened like a NULL pointer
in kernel code.

Add a function make_task_dead that is initialy exactly the same as
do_exit to cover the cases where do_exit is called to handle
catastrophic failure.  In time this can probably be reduced to just a
light wrapper around do_task_dead. For now keep it exactly the same so
that there will be no behavioral differences introducing this new
concept.

Replace all of the uses of do_exit that use it for catastraphic
task cleanup with make_task_dead to make it clear what the code
is doing.

As part of this rename rewind_stack_do_exit
rewind_stack_and_make_dead.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/task.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ba88a6987400..2d4bbd9c3278 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -59,6 +59,7 @@ extern void sched_post_fork(struct task_struct *p,
 extern void sched_dead(struct task_struct *p);
 
 void __noreturn do_task_dead(void);
+void __noreturn make_task_dead(int signr);
 
 extern void proc_caches_init(void);
 
-- 
cgit v1.2.3


From bbda86e988d4c124e4cfa816291cbd583ae8bfb1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Nov 2021 10:27:36 -0600
Subject: exit: Implement kthread_exit

The way the per task_struct exit_code is used by kernel threads is not
quite compatible how it is used by userspace applications.  The low
byte of the userspace exit_code value encodes the exit signal.  While
kthreads just use the value as an int holding ordinary kernel function
exit status like -EPERM.

Add kthread_exit to clearly separate the two kinds of uses.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/kthread.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 346b0f269161..22c43d419687 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -70,6 +70,7 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
+void kthread_exit(long result) __noreturn;
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
-- 
cgit v1.2.3


From ca3574bd653aba234a4b31955f2778947403be16 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Dec 2021 11:00:19 -0600
Subject: exit: Rename module_put_and_exit to module_put_and_kthread_exit

Update module_put_and_exit to call kthread_exit instead of do_exit.

Change the name to reflect this change in functionality.  All of the
users of module_put_and_exit are causing the current kthread to exit
so this change makes it clear what is happening.  There is no
functional change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/module.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index c9f1200b2312..f03be97e9ec1 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -595,9 +595,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 /* Look for this name: can be of form module:name. */
 unsigned long module_kallsyms_lookup_name(const char *name);
 
-extern void __noreturn __module_put_and_exit(struct module *mod,
+extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
 			long code);
-#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code)
+#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)
 
 #ifdef CONFIG_MODULE_UNLOAD
 int module_refcount(struct module *mod);
@@ -790,7 +790,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb)
 	return 0;
 }
 
-#define module_put_and_exit(code) do_exit(code)
+#define module_put_and_kthread_exit(code) kthread_exit(code)
 
 static inline void print_modules(void)
 {
-- 
cgit v1.2.3


From cead18552660702a4a46f58e65188fe5f36e9dfe Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Nov 2021 11:15:19 -0600
Subject: exit: Rename complete_and_exit to kthread_complete_and_exit

Update complete_and_exit to call kthread_exit instead of do_exit.

Change the name to reflect this change in functionality.  All of the
users of complete_and_exit are causing the current kthread to exit so
this change makes it clear what is happening.

Move the implementation of kthread_complete_and_exit from
kernel/exit.c to to kernel/kthread.c.  As this function is kthread
specific it makes most sense to live with the kthread functions.

There are no functional change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/kernel.h  | 1 -
 include/linux/kthread.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 77755ac3e189..055eb203c00e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,7 +187,6 @@ static inline void might_fault(void) { }
 #endif
 
 void do_exit(long error_code) __noreturn;
-void complete_and_exit(struct completion *, long) __noreturn;
 
 extern int num_to_str(char *buf, int size,
 		      unsigned long long num, unsigned int width);
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 22c43d419687..d86a7e3b9a52 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -71,6 +71,7 @@ int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
 void kthread_exit(long result) __noreturn;
+void kthread_complete_and_exit(struct completion *, long) __noreturn;
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
-- 
cgit v1.2.3


From 40966e316f86b8cfd83abd31ccb4df729309d3e7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 2 Dec 2021 09:56:14 -0600
Subject: kthread: Ensure struct kthread is present for all kthreads

Today the rules are a bit iffy and arbitrary about which kernel
threads have struct kthread present.  Both idle threads and thread
started with create_kthread want struct kthread present so that is
effectively all kernel threads.  Make the rule that if PF_KTHREAD
and the task is running then struct kthread is present.

This will allow the kernel thread code to using tsk->exit_code
with different semantics from ordinary processes.

To make ensure that struct kthread is present for all
kernel threads move it's allocation into copy_process.

Add a deallocation of struct kthread in exec for processes
that were kernel threads.

Move the allocation of struct kthread for the initial thread
earlier so that it is not repeated for each additional idle
thread.

Move the initialization of struct kthread into set_kthread_struct
so that the structure is always and reliably initailized.

Clear set_child_tid in free_kthread_struct to ensure the kthread
struct is reliably freed during exec.  The function
free_kthread_struct does not need to clear vfork_done during exec as
exec_mm_release called from exec_mmap has already cleared vfork_done.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/kthread.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index d86a7e3b9a52..4f3433afb54b 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -33,7 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 					  unsigned int cpu,
 					  const char *namefmt);
 
-void set_kthread_struct(struct task_struct *p);
+bool set_kthread_struct(struct task_struct *p);
 
 void kthread_set_per_cpu(struct task_struct *k, int cpu);
 bool kthread_is_per_cpu(struct task_struct *k);
-- 
cgit v1.2.3


From df5e49c880ea0776806b8a9f8ab95e035272cf6f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: SUNRPC: change svc_get() to return the svc.

It is common for 'get' functions to return the object that was 'got',
and there are a couple of places where users of svc_get() would be a
little simpler if svc_get() did that.

Make it so.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 0ae28ae6caf2..5d9568953fcd 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -120,9 +120,10 @@ struct svc_serv {
  * change the number of threads.  Horrible, but there it is.
  * Should be called with the "service mutex" held.
  */
-static inline void svc_get(struct svc_serv *serv)
+static inline struct svc_serv *svc_get(struct svc_serv *serv)
 {
 	serv->sv_nrthreads++;
+	return serv;
 }
 
 /*
-- 
cgit v1.2.3


From 8c62d12740a1450d2e8456d5747f440e10db281a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: SUNRPC/NFSD: clean up get/put functions.

svc_destroy() is poorly named - it doesn't necessarily destroy the svc,
it might just reduce the ref count.
nfsd_destroy() is poorly named for the same reason.

This patch:
 - removes the refcount functionality from svc_destroy(), moving it to
   a new svc_put().  Almost all previous callers of svc_destroy() now
   call svc_put().
 - renames nfsd_destroy() to nfsd_put() and improves the code, using
   the new svc_destroy() rather than svc_put()
 - removes a few comments that explain the important for balanced
   get/put calls.  This should be obvious.

The only non-trivial part of this is that svc_destroy() would call
svc_sock_update() on a non-final decrement.  It can no longer do that,
and svc_put() isn't really a good place of it.  This call is now made
from svc_exit_thread() which seems like a good place.  This makes the
call *before* sv_nrthreads is decremented rather than after.  This
is not particularly important as the call just sets a flag which
causes sv_nrthreads set be checked later.  A subsequent patch will
improve the ordering.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5d9568953fcd..73d56d33a36d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -114,8 +114,13 @@ struct svc_serv {
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 };
 
-/*
- * We use sv_nrthreads as a reference count.  svc_destroy() drops
+/**
+ * svc_get() - increment reference count on a SUNRPC serv
+ * @serv:  the svc_serv to have count incremented
+ *
+ * Returns: the svc_serv that was passed in.
+ *
+ * We use sv_nrthreads as a reference count.  svc_put() drops
  * this refcount, so we need to bump it up around operations that
  * change the number of threads.  Horrible, but there it is.
  * Should be called with the "service mutex" held.
@@ -126,6 +131,22 @@ static inline struct svc_serv *svc_get(struct svc_serv *serv)
 	return serv;
 }
 
+void svc_destroy(struct svc_serv *serv);
+
+/**
+ * svc_put - decrement reference count on a SUNRPC serv
+ * @serv:  the svc_serv to have count decremented
+ *
+ * When the reference count reaches zero, svc_destroy()
+ * is called to clean up and free the serv.
+ */
+static inline void svc_put(struct svc_serv *serv)
+{
+	serv->sv_nrthreads -= 1;
+	if (serv->sv_nrthreads == 0)
+		svc_destroy(serv);
+}
+
 /*
  * Maximum payload size supported by a kernel RPC server.
  * This is use to determine the max number of pages nfsd is
@@ -515,7 +536,6 @@ struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int		   svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
-void		   svc_destroy(struct svc_serv *);
 void		   svc_shutdown_net(struct svc_serv *, struct net *);
 int		   svc_process(struct svc_rqst *);
 int		   bc_svc_process(struct svc_serv *, struct rpc_rqst *,
-- 
cgit v1.2.3


From ec52361df99b490f6af412b046df9799b92c1050 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: SUNRPC: stop using ->sv_nrthreads as a refcount

The use of sv_nrthreads as a general refcount results in clumsy code, as
is seen by various comments needed to explain the situation.

This patch introduces a 'struct kref' and uses that for reference
counting, leaving sv_nrthreads to be a pure count of threads.  The kref
is managed particularly in svc_get() and svc_put(), and also nfsd_put();

svc_destroy() now takes a pointer to the embedded kref, rather than to
the serv.

nfsd allows the svc_serv to exist with ->sv_nrhtreads being zero.  This
happens when a transport is created before the first thread is started.
To support this, a 'keep_active' flag is introduced which holds a ref on
the svc_serv.  This is set when any listening socket is successfully
added (unless there are running threads), and cleared when the number of
threads is set.  So when the last thread exits, the nfs_serv will be
destroyed.
The use of 'keep_active' replaces previous code which checked if there
were any permanent sockets.

We no longer clear ->rq_server when nfsd() exits.  This was done
to prevent svc_exit_thread() from calling svc_destroy().
Instead we take an extra reference to the svc_serv to prevent
svc_destroy() from being called.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 73d56d33a36d..3903b4ae8ac5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -85,6 +85,7 @@ struct svc_serv {
 	struct svc_program *	sv_program;	/* RPC program */
 	struct svc_stat *	sv_stats;	/* RPC statistics */
 	spinlock_t		sv_lock;
+	struct kref		sv_refcnt;
 	unsigned int		sv_nrthreads;	/* # of server threads */
 	unsigned int		sv_maxconn;	/* max connections allowed or
 						 * '0' causing max to be based
@@ -119,19 +120,14 @@ struct svc_serv {
  * @serv:  the svc_serv to have count incremented
  *
  * Returns: the svc_serv that was passed in.
- *
- * We use sv_nrthreads as a reference count.  svc_put() drops
- * this refcount, so we need to bump it up around operations that
- * change the number of threads.  Horrible, but there it is.
- * Should be called with the "service mutex" held.
  */
 static inline struct svc_serv *svc_get(struct svc_serv *serv)
 {
-	serv->sv_nrthreads++;
+	kref_get(&serv->sv_refcnt);
 	return serv;
 }
 
-void svc_destroy(struct svc_serv *serv);
+void svc_destroy(struct kref *);
 
 /**
  * svc_put - decrement reference count on a SUNRPC serv
@@ -142,9 +138,7 @@ void svc_destroy(struct svc_serv *serv);
  */
 static inline void svc_put(struct svc_serv *serv)
 {
-	serv->sv_nrthreads -= 1;
-	if (serv->sv_nrthreads == 0)
-		svc_destroy(serv);
+	kref_put(&serv->sv_refcnt, svc_destroy);
 }
 
 /*
-- 
cgit v1.2.3


From 3409e4f1e8f239f0ed81be0b068ecf4e73e2e826 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: NFSD: Make it possible to use svc_set_num_threads_sync

nfsd cannot currently use svc_set_num_threads_sync.  It instead
uses svc_set_num_threads which does *not* wait for threads to all
exit, and has a separate mechanism (nfsd_shutdown_complete) to wait
for completion.

The reason that nfsd is unlike other services is that nfsd threads can
exit separately from svc_set_num_threads being called - they die on
receipt of SIGKILL.  Also, when the last thread exits, the service must
be shut down (sockets closed).

For this, the nfsd_mutex needs to be taken, and as that mutex needs to
be held while svc_set_num_threads is called, the one cannot wait for
the other.

This patch changes the nfsd thread so that it can drop the ref on the
service without blocking on nfsd_mutex, so that svc_set_num_threads_sync
can be used:
 - if it can drop a non-last reference, it does that.  This does not
   trigger shutdown and does not require a mutex.  This will likely
   happen for all but the last thread signalled, and for all threads
   being shut down by nfsd_shutdown_threads()
 - if it can get the mutex without blocking (trylock), it does that
   and then drops the reference.  This will likely happen for the
   last thread killed by SIGKILL
 - Otherwise there might be an unrelated task holding the mutex,
   possibly in another network namespace, or nfsd_shutdown_threads()
   might be just about to get a reference on the service, after which
   we can drop ours safely.
   We cannot conveniently get wakeup notifications on these events,
   and we are unlikely to need to, so we sleep briefly and check again.

With this we can discard nfsd_shutdown_complete and
nfsd_complete_shutdown(), and switch to svc_set_num_threads_sync.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3903b4ae8ac5..36bfc0281988 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -141,6 +141,19 @@ static inline void svc_put(struct svc_serv *serv)
 	kref_put(&serv->sv_refcnt, svc_destroy);
 }
 
+/**
+ * svc_put_not_last - decrement non-final reference count on SUNRPC serv
+ * @serv:  the svc_serv to have count decremented
+ *
+ * Returns: %true is refcount was decremented.
+ *
+ * If the refcount is 1, it is not decremented and instead failure is reported.
+ */
+static inline bool svc_put_not_last(struct svc_serv *serv)
+{
+	return refcount_dec_not_one(&serv->sv_refcnt.refcount);
+}
+
 /*
  * Maximum payload size supported by a kernel RPC server.
  * This is use to determine the max number of pages nfsd is
-- 
cgit v1.2.3


From 3ebdbe5203a874614819700d3f470724cb803709 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: SUNRPC: discard svo_setup and rename svc_set_num_threads_sync()

The ->svo_setup callback serves no purpose.  It is always called from
within the same module that chooses which callback is needed.  So
discard it and call the relevant function directly.

Now that svc_set_num_threads() is no longer used remove it and rename
svc_set_num_threads_sync() to remove the "_sync" suffix.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 36bfc0281988..0b38c6eaf985 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -64,9 +64,6 @@ struct svc_serv_ops {
 	/* queue up a transport for servicing */
 	void		(*svo_enqueue_xprt)(struct svc_xprt *);
 
-	/* set up thread (or whatever) execution context */
-	int		(*svo_setup)(struct svc_serv *, struct svc_pool *, int);
-
 	/* optional module to count when adding threads (pooled svcs only) */
 	struct module	*svo_module;
 };
@@ -541,7 +538,6 @@ void		   svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 			const struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
-int		   svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 void		   svc_shutdown_net(struct svc_serv *, struct net *);
 int		   svc_process(struct svc_rqst *);
-- 
cgit v1.2.3


From cf0e124e0a489944d08fcc3c694d2b234d2cc658 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: SUNRPC: move the pool_map definitions (back) into svc.c

These definitions are not used outside of svc.c, and there is no
evidence that they ever have been.  So move them into svc.c
and make the declarations 'static'.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 25 -------------------------
 1 file changed, 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 0b38c6eaf985..d69e6108cb83 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -494,29 +494,6 @@ struct svc_procedure {
 	const char *		pc_name;	/* for display */
 };
 
-/*
- * Mode for mapping cpus to pools.
- */
-enum {
-	SVC_POOL_AUTO = -1,	/* choose one of the others */
-	SVC_POOL_GLOBAL,	/* no mapping, just a single global pool
-				 * (legacy & UP mode) */
-	SVC_POOL_PERCPU,	/* one pool per cpu */
-	SVC_POOL_PERNODE	/* one pool per numa node */
-};
-
-struct svc_pool_map {
-	int count;			/* How many svc_servs use us */
-	int mode;			/* Note: int not enum to avoid
-					 * warnings about "enumeration value
-					 * not handled in switch" */
-	unsigned int npools;
-	unsigned int *pool_to;		/* maps pool id to cpu or node */
-	unsigned int *to_pool;		/* maps cpu or node to pool id */
-};
-
-extern struct svc_pool_map svc_pool_map;
-
 /*
  * Function prototypes.
  */
@@ -533,8 +510,6 @@ void		   svc_rqst_replace_page(struct svc_rqst *rqstp,
 					 struct page *page);
 void		   svc_rqst_free(struct svc_rqst *);
 void		   svc_exit_thread(struct svc_rqst *);
-unsigned int	   svc_pool_map_get(void);
-void		   svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 			const struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
-- 
cgit v1.2.3


From 6b044fbaab02292fedb17565dbb3f2528083b169 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 29 Nov 2021 15:51:25 +1100
Subject: lockd: use svc_set_num_threads() for thread start and stop

svc_set_num_threads() does everything that lockd_start_svc() does, except
set sv_maxconn.  It also (when passed 0) finds the threads and
stops them with kthread_stop().

So move the setting for sv_maxconn, and use svc_set_num_thread()

We now don't need nlmsvc_task.

Now that we use svc_set_num_threads() it makes sense to set svo_module.
This request that the thread exists with module_put_and_exit().
Also fix the documentation for svo_module to make this explicit.

svc_prepare_thread is now only used where it is defined, so it can be
made static.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d69e6108cb83..cf175d47c6b7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -64,7 +64,9 @@ struct svc_serv_ops {
 	/* queue up a transport for servicing */
 	void		(*svo_enqueue_xprt)(struct svc_xprt *);
 
-	/* optional module to count when adding threads (pooled svcs only) */
+	/* optional module to count when adding threads.
+	 * Thread function must call module_put_and_exit() to exit.
+	 */
 	struct module	*svo_module;
 };
 
@@ -504,8 +506,6 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int,
 			    const struct svc_serv_ops *);
 struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
 					struct svc_pool *pool, int node);
-struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
-					struct svc_pool *pool, int node);
 void		   svc_rqst_replace_page(struct svc_rqst *rqstp,
 					 struct page *page);
 void		   svc_rqst_free(struct svc_rqst *);
-- 
cgit v1.2.3


From 5089f3d97552b0b07101e02a3fca0146b9b9d3b5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 19 Oct 2021 15:17:21 -0400
Subject: SUNRPC: Remove low signal-to-noise tracepoints

I'm about to add more information to the server-side SUNRPC
tracepoints, so I'm going to offset the increased trace log
consumption by getting rid of some tracepoints that fire frequently
but don't offer much value.

trace_svc_xprt_received() was useful for debugging, perhaps, but
is not generally informative.

trace_svc_handle_xprt() reports largely the same information as
trace_svc_xdr_recvfrom().

As a clean-up, rename trace_svc_xprt_do_enqueue() to match
svc_xprt_dequeue().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 3a99358c262b..684cc0e322fa 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1768,7 +1768,7 @@ TRACE_EVENT(svc_xprt_create_err,
 		__entry->error)
 );
 
-TRACE_EVENT(svc_xprt_do_enqueue,
+TRACE_EVENT(svc_xprt_enqueue,
 	TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
 
 	TP_ARGS(xprt, rqst),
@@ -1815,7 +1815,6 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
 			), \
 			TP_ARGS(xprt))
 
-DEFINE_SVC_XPRT_EVENT(received);
 DEFINE_SVC_XPRT_EVENT(no_write_space);
 DEFINE_SVC_XPRT_EVENT(close);
 DEFINE_SVC_XPRT_EVENT(detach);
@@ -1902,27 +1901,6 @@ TRACE_EVENT(svc_alloc_arg_err,
 	TP_printk("pages=%u", __entry->pages)
 );
 
-TRACE_EVENT(svc_handle_xprt,
-	TP_PROTO(struct svc_xprt *xprt, int len),
-
-	TP_ARGS(xprt, len),
-
-	TP_STRUCT__entry(
-		__field(int, len)
-		__field(unsigned long, flags)
-		__string(addr, xprt->xpt_remotebuf)
-	),
-
-	TP_fast_assign(
-		__entry->len = len;
-		__entry->flags = xprt->xpt_flags;
-		__assign_str(addr, xprt->xpt_remotebuf);
-	),
-
-	TP_printk("addr=%s len=%d flags=%s", __get_str(addr),
-		__entry->len, show_svc_xprt_flags(__entry->flags))
-);
-
 TRACE_EVENT(svc_stats_latency,
 	TP_PROTO(const struct svc_rqst *rqst),
 
-- 
cgit v1.2.3


From c8064e5b4adac5e1255cf4f3b374e75b5376e7ca Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Nov 2021 11:08:07 +0100
Subject: bpf: Let bpf_warn_invalid_xdp_action() report more info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In non trivial scenarios, the action id alone is not sufficient to
identify the program causing the warning. Before the previous patch,
the generated stack-trace pointed out at least the involved device
driver.

Let's additionally include the program name and id, and the relevant
device name.

If the user needs additional infos, he can fetch them via a kernel
probe, leveraging the arguments added here.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/ddb96bb975cbfddb1546cf5da60e77d5100b533c.1638189075.git.pabeni@redhat.com
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 68c6b5c208e7..60eec80fa1d4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1027,7 +1027,7 @@ void xdp_do_flush(void);
  */
 #define xdp_do_flush_map xdp_do_flush
 
-void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act);
 
 #ifdef CONFIG_INET
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
-- 
cgit v1.2.3


From d27a662290963a1cde26cdfdbac71a546c06e94a Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Fri, 10 Dec 2021 18:15:11 +0100
Subject: xsk: Wipe out dead zero_copy_allocator declarations

zero_copy_allocator has been removed back when Bjorn Topel introduced
xsk_buff_pool. Remove references to it that were dangling in the tree.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20211210171511.11574-1-maciej.fijalkowski@intel.com
---
 include/net/xdp_priv.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
index a9d5b7603b89..a2d58b1a12e1 100644
--- a/include/net/xdp_priv.h
+++ b/include/net/xdp_priv.h
@@ -10,7 +10,6 @@ struct xdp_mem_allocator {
 	union {
 		void *allocator;
 		struct page_pool *page_pool;
-		struct zero_copy_allocator *zc_alloc;
 	};
 	struct rhash_head node;
 	struct rcu_head rcu;
-- 
cgit v1.2.3


From 22c3f2f56bd9c901e1da69040680c311f310635d Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Wed, 1 Dec 2021 11:36:18 -0800
Subject: net/mlx5: Separate FDB namespace

This patch doesn't add an additional namespaces, but just separates the
naming to be used by each FDB user, bypass and kernel.
Downstream patches will actually split this up and allow to have more
than single priority for the bypass users.

Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index cd2d4c572367..b1aad14689e3 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -73,6 +73,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_KERNEL,
 	MLX5_FLOW_NAMESPACE_LEFTOVERS,
 	MLX5_FLOW_NAMESPACE_ANCHOR,
+	MLX5_FLOW_NAMESPACE_FDB_BYPASS,
 	MLX5_FLOW_NAMESPACE_FDB,
 	MLX5_FLOW_NAMESPACE_ESW_EGRESS,
 	MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-- 
cgit v1.2.3


From 057ccd9db760fc2c336b1138dab345bc2d3cdd35 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Wed, 8 Dec 2021 21:17:53 +0800
Subject: dt-bindings: power: imx8ulp: add power domain header file

Add i.MX8ULP power domain header file

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/dt-bindings/power/imx8ulp-power.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 include/dt-bindings/power/imx8ulp-power.h

(limited to 'include')

diff --git a/include/dt-bindings/power/imx8ulp-power.h b/include/dt-bindings/power/imx8ulp-power.h
new file mode 100644
index 000000000000..a556b2e96df1
--- /dev/null
+++ b/include/dt-bindings/power/imx8ulp-power.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ *  Copyright 2021 NXP
+ */
+
+#ifndef __DT_BINDINGS_IMX8ULP_POWER_H__
+#define __DT_BINDINGS_IMX8ULP_POWER_H__
+
+#define IMX8ULP_PD_DMA1		0
+#define IMX8ULP_PD_FLEXSPI2	1
+#define IMX8ULP_PD_USB0		2
+#define IMX8ULP_PD_USDHC0	3
+#define IMX8ULP_PD_USDHC1	4
+#define IMX8ULP_PD_USDHC2_USB1	5
+#define IMX8ULP_PD_DCNANO	6
+#define IMX8ULP_PD_EPDC		7
+#define IMX8ULP_PD_DMA2		8
+#define IMX8ULP_PD_GPU2D	9
+#define IMX8ULP_PD_GPU3D	10
+#define IMX8ULP_PD_HIFI4	11
+#define IMX8ULP_PD_ISI		12
+#define IMX8ULP_PD_MIPI_CSI	13
+#define IMX8ULP_PD_MIPI_DSI	14
+#define IMX8ULP_PD_PXP		15
+
+#endif
-- 
cgit v1.2.3


From 8aa45b544db9788b0fcc7a300eba8a3ade5d3d50 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 10 Dec 2021 13:11:34 +0200
Subject: HID: Add map_msc() to avoid boilerplate code

Since we are going to have more MSC events too, add map_msc() that can
be used to fill in necessary fields and avoid boilerplate code.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211210111138.1248187-2-tero.kristo@linux.intel.com
---
 include/linux/hid.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index b2fea7fc54a1..f18e2cf5d74d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1010,6 +1010,10 @@ static inline void hid_map_usage(struct hid_input *hidinput,
 		bmap = input->ledbit;
 		limit = LED_MAX;
 		break;
+	case EV_MSC:
+		bmap = input->mscbit;
+		limit = MSC_MAX;
+		break;
 	}
 
 	if (unlikely(c > limit || !bmap)) {
-- 
cgit v1.2.3


From ae7fafa6896ae762ff489a5e71c8e5fabfeb61ee Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@linux.intel.com>
Date: Fri, 10 Dec 2021 13:11:36 +0200
Subject: HID: Add hid usages for USI style pens

Add usage codes for USI style pens, based on the USB-HID usage table:
    https://usb.org/document-library/hid-usage-tables-122

See chapter 16, Digitizers Page (0x0D)

Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211210111138.1248187-4-tero.kristo@linux.intel.com
---
 include/linux/hid.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index f18e2cf5d74d..fa2ddda84a53 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -241,6 +241,7 @@ struct hid_item {
 #define HID_DG_TOUCH		0x000d0033
 #define HID_DG_UNTOUCH		0x000d0034
 #define HID_DG_TAP		0x000d0035
+#define HID_DG_TRANSDUCER_INDEX	0x000d0038
 #define HID_DG_TABLETFUNCTIONKEY	0x000d0039
 #define HID_DG_PROGRAMCHANGEKEY	0x000d003a
 #define HID_DG_BATTERYSTRENGTH	0x000d003b
@@ -253,6 +254,15 @@ struct hid_item {
 #define HID_DG_BARRELSWITCH	0x000d0044
 #define HID_DG_ERASER		0x000d0045
 #define HID_DG_TABLETPICK	0x000d0046
+#define HID_DG_PEN_COLOR			0x000d005c
+#define HID_DG_PEN_LINE_WIDTH			0x000d005e
+#define HID_DG_PEN_LINE_STYLE			0x000d0070
+#define HID_DG_PEN_LINE_STYLE_INK		0x000d0072
+#define HID_DG_PEN_LINE_STYLE_PENCIL		0x000d0073
+#define HID_DG_PEN_LINE_STYLE_HIGHLIGHTER	0x000d0074
+#define HID_DG_PEN_LINE_STYLE_CHISEL_MARKER	0x000d0075
+#define HID_DG_PEN_LINE_STYLE_BRUSH		0x000d0076
+#define HID_DG_PEN_LINE_STYLE_NO_PREFERENCE	0x000d0077
 
 #define HID_CP_CONSUMERCONTROL	0x000c0001
 #define HID_CP_NUMERICKEYPAD	0x000c0002
-- 
cgit v1.2.3


From 5904a3f9d756f108279f8c5b8f17ca6ddfb28d24 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 10 Dec 2021 13:11:37 +0200
Subject: HID: input: Make hidinput_find_field() static

This function is not called outside of hid-input.c so we can make it
static.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211210111138.1248187-5-tero.kristo@linux.intel.com
---
 include/linux/hid.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index fa2ddda84a53..ff8b6973022a 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -899,7 +899,6 @@ extern void hidinput_disconnect(struct hid_device *);
 
 int hid_set_field(struct hid_field *, unsigned, __s32);
 int hid_input_report(struct hid_device *, int type, u8 *, u32, int);
-int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field);
 struct hid_field *hidinput_get_led_field(struct hid_device *hid);
 unsigned int hidinput_count_leds(struct hid_device *hid);
 __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code);
-- 
cgit v1.2.3


From 98046f7486db723ec8bb99a950a4fa5f5be55cd1 Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Thu, 25 Nov 2021 15:05:26 +0800
Subject: fuse: support per inode DAX in fuse protocol

Expand the fuse protocol to support per inode DAX.

FUSE_HAS_INODE_DAX flag is added indicating if fuse server/client
supporting per inode DAX. It can be conveyed in both FUSE_INIT request and
reply.

FUSE_ATTR_DAX flag is added indicating if DAX shall be enabled for
corresponding file. It is conveyed in FUSE_LOOKUP reply.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 3f0ea63fec08..d6ccee961891 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -193,6 +193,7 @@
  *  - add flags2 to fuse_init_in and fuse_init_out
  *  - add FUSE_SECURITY_CTX init flag
  *  - add security context to create, mkdir, symlink, and mknod requests
+ *  - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
  */
 
 #ifndef _LINUX_FUSE_H
@@ -351,6 +352,7 @@ struct fuse_file_lock {
  * FUSE_INIT_RESERVED: reserved, do not use
  * FUSE_SECURITY_CTX:	add security context to create, mkdir, symlink, and
  *			mknod
+ * FUSE_HAS_INODE_DAX:  use per inode DAX
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -386,6 +388,7 @@ struct fuse_file_lock {
 #define FUSE_INIT_RESERVED	(1 << 31)
 /* bits 32..63 get shifted down 32 bits into the flags2 field */
 #define FUSE_SECURITY_CTX	(1ULL << 32)
+#define FUSE_HAS_INODE_DAX	(1ULL << 33)
 
 /**
  * CUSE INIT request/reply flags
@@ -468,8 +471,10 @@ struct fuse_file_lock {
  * fuse_attr flags
  *
  * FUSE_ATTR_SUBMOUNT: Object is a submount root
+ * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode
  */
 #define FUSE_ATTR_SUBMOUNT      (1 << 0)
+#define FUSE_ATTR_DAX		(1 << 1)
 
 /**
  * Open flags
-- 
cgit v1.2.3


From fd8d135b2c5e88662f2729e034913f183455a667 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair@alistair23.me>
Date: Wed, 8 Dec 2021 22:40:43 +1000
Subject: HID: quirks: Allow inverting the absolute X/Y values

Add a HID_QUIRK_X_INVERT/HID_QUIRK_Y_INVERT quirk that can be used
to invert the X/Y values.

Signed-off-by: Alistair Francis <alistair@alistair23.me>
[bentiss: silence checkpatch warning]
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211208124045.61815-2-alistair@alistair23.me
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index ff8b6973022a..a14c089eb0c1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -359,6 +359,8 @@ struct hid_item {
 /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */
 #define HID_QUIRK_ALWAYS_POLL			BIT(10)
 #define HID_QUIRK_INPUT_PER_APP			BIT(11)
+#define HID_QUIRK_X_INVERT			BIT(12)
+#define HID_QUIRK_Y_INVERT			BIT(13)
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		BIT(16)
 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID		BIT(17)
 #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP	BIT(18)
-- 
cgit v1.2.3


From 43d5ac7d07023cd133b978de473b3400edad941f Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Tue, 23 Nov 2021 11:24:04 +0000
Subject: drm: document DRM_IOCTL_MODE_GETFB2

There are a few details specific to the GETFB2 IOCTL.

It's not immediately clear how user-space should check for the
number of planes. Suggest using the handles field or the pitches
field.

The modifier array is filled with zeroes, ie. DRM_FORMAT_MOD_LINEAR.
So explicitly tell user-space to not look at it unless the flag is
set.

Changes in v2 (Daniel):
- Mention that handles should be used to compute the number of planes,
  and only refer to pitches as a fallback.
- Reword bit about undefined modifier.

Signed-off-by: Simon Ser <contact@emersion.fr>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Daniel Stone <daniels@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211123112400.22245-1-contact@emersion.fr
---
 include/uapi/drm/drm.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 3b810b53ba8b..642808520d92 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1096,6 +1096,24 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
 
+/**
+ * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
+ *
+ * This queries metadata about a framebuffer. User-space fills
+ * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the
+ * struct as the output.
+ *
+ * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
+ * will be filled with GEM buffer handles. Planes are valid until one has a
+ * zero handle -- this can be used to compute the number of planes.
+ *
+ * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
+ * until one has a zero &drm_mode_fb_cmd2.pitches.
+ *
+ * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
+ * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
+ * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ */
 #define DRM_IOCTL_MODE_GETFB2		DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
 
 /*
-- 
cgit v1.2.3


From 369461ce8fb6c8156206c7110d7da48e9fbc41bb Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 8 Dec 2021 14:11:20 -0600
Subject: x86: perf: Move RDPMC event flag to a common definition

In preparation to enable user counter access on arm64 and to move some
of the user access handling to perf core, create a common event flag for
user counter access and convert x86 to use it.

Since the architecture specific flags start at the LSB, starting at the
MSB for common flags.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: linux-perf-users@vger.kernel.org
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211208201124.310740-2-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf_event.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0dcfd265beed..ba9467972c09 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -129,6 +129,15 @@ struct hw_perf_event_extra {
 	int		idx;	/* index in shared_regs->regs[] */
 };
 
+/**
+ * hw_perf_event::flag values
+ *
+ * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
+ * usage.
+ */
+#define PERF_EVENT_FLAG_ARCH			0x0000ffff
+#define PERF_EVENT_FLAG_USER_READ_CNT		0x80000000
+
 /**
  * struct hw_perf_event - performance event hardware details:
  */
-- 
cgit v1.2.3


From 82ff0c022d19c2ad69a472692bb7ee01ac07a40b Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 8 Dec 2021 14:11:21 -0600
Subject: perf: Add a counter for number of user access events in context

On arm64, user space counter access will be controlled differently
compared to x86. On x86, access in the strictest mode is enabled for all
tasks in an MM when any event is mmap'ed. For arm64, access is
explicitly requested for an event and only enabled when the event's
context is active. This avoids hooks into the arch context switch code
and gives better control of when access is enabled.

In order to configure user space access when the PMU is enabled, it is
necessary to know if any event (currently active or not) in the current
context has user space accessed enabled. Add a counter similar to other
counters in the context to avoid walking the event list every time.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211208201124.310740-3-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ba9467972c09..411e34210fbf 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -831,6 +831,7 @@ struct perf_event_context {
 
 	int				nr_events;
 	int				nr_active;
+	int				nr_user;
 	int				is_active;
 	int				nr_stat;
 	int				nr_freq;
-- 
cgit v1.2.3


From 9c9211a3fc7aa41b2952765b62000443b3bb6f23 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 10 Dec 2021 16:59:58 +0800
Subject: net_tstamp: add new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX

Since commit 94dd016ae538 ("bond: pass get_ts_info and SIOC[SG]HWTSTAMP
ioctl to active device") the user could get bond active interface's
PHC index directly. But when there is a failover, the bond active
interface will change, thus the PHC index is also changed. This may
break the user's program if they did not update the PHC timely.

This patch adds a new hwtstamp_config flag HWTSTAMP_FLAG_BONDED_PHC_INDEX.
When the user wants to get the bond active interface's PHC, they need to
add this flag and be aware the PHC index may be changed.

With the new flag. All flag checks in current drivers are removed. Only
the checking in net_hwtstamp_validate() is kept.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_tstamp.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index fcc61c73a666..e258e52cfd1f 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -62,7 +62,7 @@ struct so_timestamping {
 /**
  * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter
  *
- * @flags:	no flags defined right now, must be zero for %SIOCSHWTSTAMP
+ * @flags:	one of HWTSTAMP_FLAG_*
  * @tx_type:	one of HWTSTAMP_TX_*
  * @rx_filter:	one of HWTSTAMP_FILTER_*
  *
@@ -78,6 +78,20 @@ struct hwtstamp_config {
 	int rx_filter;
 };
 
+/* possible values for hwtstamp_config->flags */
+enum hwtstamp_flags {
+	/*
+	 * With this flag, the user could get bond active interface's
+	 * PHC index. Note this PHC index is not stable as when there
+	 * is a failover, the bond active interface will be changed, so
+	 * will be the PHC index.
+	 */
+	HWTSTAMP_FLAG_BONDED_PHC_INDEX = (1<<0),
+
+	HWTSTAMP_FLAG_LAST = HWTSTAMP_FLAG_BONDED_PHC_INDEX,
+	HWTSTAMP_FLAG_MASK = (HWTSTAMP_FLAG_LAST - 1) | HWTSTAMP_FLAG_LAST
+};
+
 /* possible values for hwtstamp_config->tx_type */
 enum hwtstamp_tx_types {
 	/*
-- 
cgit v1.2.3


From 8404b0fbc7fbd42e5c5d28cdedd450e70829c77a Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Thu, 2 Dec 2021 16:06:33 +0800
Subject: drivers/perf: hisi: Add driver for HiSilicon PCIe PMU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCIe PMU Root Complex Integrated End Point(RCiEP) device is supported
to sample bandwidth, latency, buffer occupation etc.

Each PMU RCiEP device monitors multiple Root Ports, and each RCiEP is
registered as a PMU in /sys/bus/event_source/devices, so users can
select target PMU, and use filter to do further sets.

Filtering options contains:
event     - select the event.
port      - select target Root Ports. Information of Root Ports are
            shown under sysfs.
bdf       - select requester_id of target EP device.
trig_len  - set trigger condition for starting event statistics.
trig_mode - set trigger mode. 0 means starting to statistic when bigger
            than trigger condition, and 1 means smaller.
thr_len   - set threshold for statistics.
thr_mode  - set threshold mode. 0 means count when bigger than threshold,
            and 1 means smaller.

Acked-by: Krzysztof Wilczyński <kw@linux.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Reviewed-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/20211202080633.2919-3-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 773c83730906..411a428ace4d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -225,6 +225,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
 	CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
 	CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
-- 
cgit v1.2.3


From c8a2a011cd04054a6577d8cf774adf9e09302876 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 14 Dec 2021 03:45:35 +0200
Subject: net: dsa: sja1105: fix broken connection with the sja1110 tagger

The driver was incorrectly converted assuming that "sja1105" is the only
tagger supported by this driver. This results in SJA1110 switches
failing to probe:

sja1105 spi1.0: Unable to connect to tag protocol "sja1110": -EPROTONOSUPPORT
sja1105: probe of spi1.2 failed with error -93

Add DSA_TAG_PROTO_SJA1110 to the list of supported taggers by the
sja1105 driver. The sja1105_tagger_data structure format is common for
the two tagging protocols.

Fixes: c79e84866d2a ("net: dsa: tag_sja1105: convert to tagger-owned data")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index e9cb1ae6d742..159e43171ccc 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -70,7 +70,8 @@ struct sja1105_skb_cb {
 static inline struct sja1105_tagger_data *
 sja1105_tagger_data(struct dsa_switch *ds)
 {
-	BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105);
+	BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105 &&
+	       ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1110);
 
 	return ds->tagger_data;
 }
-- 
cgit v1.2.3


From 7f2973149c22e7a6fee4c0c9fa6b8e4108e9c208 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 14 Dec 2021 03:45:36 +0200
Subject: net: dsa: make tagging protocols connect to individual switches from
 a tree

On the NXP Bluebox 3 board which uses a multi-switch setup with sja1105,
the mechanism through which the tagger connects to the switch tree is
broken, due to improper DSA code design. At the time when tag_ops->connect()
is called in dsa_port_parse_cpu(), DSA hasn't finished "touching" all
the ports, so it doesn't know how large the tree is and how many ports
it has. It has just seen the first CPU port by this time. As a result,
this function will call the tagger's ->connect method too early, and the
tagger will connect only to the first switch from the tree.

This could be perhaps addressed a bit more simply by just moving the
tag_ops->connect(dst) call a bit later (for example in dsa_tree_setup),
but there is already a design inconsistency at present: on the switch
side, the notification is on a per-switch basis, but on the tagger side,
it is on a per-tree basis. Furthermore, the persistent storage itself is
per switch (ds->tagger_data). And the tagger connect and disconnect
procedures (at least the ones that exist currently) could see a fair bit
of simplification if they didn't have to iterate through the switches of
a tree.

To fix the issue, this change transforms tag_ops->connect(dst) into
tag_ops->connect(ds) and moves it somewhere where we already iterate
over all switches of a tree. That is in dsa_switch_setup_tag_protocol(),
which is a good placement because we already have there the connection
call to the switch side of things.

As for the dsa_tree_bind_tag_proto() method (called from the code path
that changes the tag protocol), things are a bit more complicated
because we receive the tree as argument, yet when we unwind on errors,
it would be nice to not call tag_ops->disconnect(ds) where we didn't
previously call tag_ops->connect(ds). We didn't have this problem before
because the tag_ops connection operations passed the entire dst before,
and this is more fine grained now. To solve the error rewind case using
the new API, we have to create yet one more cross-chip notifier for
disconnection, and stay connected with the old tag protocol to all the
switches in the tree until we've succeeded to connect with the new one
as well. So if something fails half way, the whole tree is still
connected to the old tagger. But there may still be leaks if the tagger
fails to connect to the 2nd out of 3 switches in a tree: somebody needs
to tell the tagger to disconnect from the first switch. Nothing comes
for free, and this was previously handled privately by the tagging
protocol driver before, but now we need to emit a disconnect cross-chip
notifier for that, because DSA has to take care of the unwind path. We
assume that the tagging protocol has connected to a switch if it has set
ds->tagger_data to something, otherwise we avoid calling its
disconnection method in the error rewind path.

The rest of the changes are in the tagging protocol drivers, and have to
do with the replacement of dst with ds. The iteration is removed and the
error unwind path is simplified, as mentioned above.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 64d71968aa91..f16959444ae1 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -82,15 +82,14 @@ enum dsa_tag_protocol {
 };
 
 struct dsa_switch;
-struct dsa_switch_tree;
 
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
 	void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
 			     int *offset);
-	int (*connect)(struct dsa_switch_tree *dst);
-	void (*disconnect)(struct dsa_switch_tree *dst);
+	int (*connect)(struct dsa_switch *ds);
+	void (*disconnect)(struct dsa_switch *ds);
 	unsigned int needed_headroom;
 	unsigned int needed_tailroom;
 	const char *name;
-- 
cgit v1.2.3


From 74747dda582dc7ddd8aac6841954e84b0d28e56c Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Sat, 27 Nov 2021 12:46:58 +0100
Subject: media: lirc: always send timeout reports

Without timeout reports, it is impossible to decode many protocols since
it is not known when the transmission ends. timeout reports are sent by
default, but can be turned off. There is no reason to turn them off, and
I cannot find any software which does this, so we can safely remove it.

This makes the ioctl LIRC_SET_REC_TIMEOUT_REPORTS a no-op.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/rc-core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/media/rc-core.h b/include/media/rc-core.h
index 8c5b7978e1d9..ab9d3b7cd799 100644
--- a/include/media/rc-core.h
+++ b/include/media/rc-core.h
@@ -59,7 +59,6 @@ enum rc_filter_type {
  * @rc: rcdev for this lirc chardev
  * @carrier_low: when setting the carrier range, first the low end must be
  *	set with an ioctl and then the high end with another ioctl
- * @send_timeout_reports: report timeouts in lirc raw IR.
  * @rawir: queue for incoming raw IR
  * @scancodes: queue for incoming decoded scancodes
  * @wait_poll: poll struct for lirc device
@@ -72,7 +71,6 @@ struct lirc_fh {
 	struct list_head list;
 	struct rc_dev *rc;
 	int				carrier_low;
-	bool				send_timeout_reports;
 	DECLARE_KFIFO_PTR(rawir, unsigned int);
 	DECLARE_KFIFO_PTR(scancodes, struct lirc_scancode);
 	wait_queue_head_t		wait_poll;
-- 
cgit v1.2.3


From f6f787874aa52bbfbfd0210f519439d38fd5377f Mon Sep 17 00:00:00 2001
From: Richard Zhu <hongxing.zhu@nxp.com>
Date: Thu, 2 Dec 2021 16:02:31 +0800
Subject: dt-bindings: phy: phy-imx8-pcie: Add binding for the pad modes of
 imx8 pcie phy

Add binding for reference clock PAD modes of the i.MX8 PCIe PHY.

Signed-off-by: Richard Zhu <hongxing.zhu@nxp.com>
Tested-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Reviewed-by: Tim Harvey <tharvey@gateworks.com>
Tested-by: Tim Harvey <tharvey@gateworks.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/1638432158-4119-2-git-send-email-hongxing.zhu@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/dt-bindings/phy/phy-imx8-pcie.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/dt-bindings/phy/phy-imx8-pcie.h

(limited to 'include')

diff --git a/include/dt-bindings/phy/phy-imx8-pcie.h b/include/dt-bindings/phy/phy-imx8-pcie.h
new file mode 100644
index 000000000000..8bbe2d6538d8
--- /dev/null
+++ b/include/dt-bindings/phy/phy-imx8-pcie.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+/*
+ * This header provides constants for i.MX8 PCIe.
+ */
+
+#ifndef _DT_BINDINGS_IMX8_PCIE_H
+#define _DT_BINDINGS_IMX8_PCIE_H
+
+/* Reference clock PAD mode */
+#define IMX8_PCIE_REFCLK_PAD_UNUSED	0
+#define IMX8_PCIE_REFCLK_PAD_INPUT	1
+#define IMX8_PCIE_REFCLK_PAD_OUTPUT	2
+
+#endif /* _DT_BINDINGS_IMX8_PCIE_H */
-- 
cgit v1.2.3


From 391137c04ec3ab3d27aa4e3081427f490655ec6f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 10 Dec 2021 13:02:01 +0100
Subject: media: dmxdev: drop unneeded <linux/kernel.h> inclusion from other
 headers

There is no evidence we need kernel.h inclusion in certain headers.
Drop unneeded <linux/kernel.h> inclusion from other headers.

Link: https://lore.kernel.org/linux-media/20211210120201.35635-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/dmxdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/media/dmxdev.h b/include/media/dmxdev.h
index baafa3b8aca4..63219a699370 100644
--- a/include/media/dmxdev.h
+++ b/include/media/dmxdev.h
@@ -21,7 +21,6 @@
 
 #include <linux/types.h>
 #include <linux/spinlock.h>
-#include <linux/kernel.h>
 #include <linux/time.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
-- 
cgit v1.2.3


From b7898396f4bbe160f546d0c5e9fa17cca9a7d153 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 7 Dec 2021 11:37:42 -0600
Subject: ASoC: soc-pcm: Fix and cleanup DPCM locking

The existing locking for DPCM has several issues
a) a confusing mix of card->mutex and card->pcm_mutex.
b) a dpcm_lock spinlock added inconsistently and on paths that could
be recursively taken. The use of irqsave/irqrestore was also overkill.

The suggested model is:

1) The pcm_mutex is the top-most protection of BE links in the FE. The
pcm_mutex is applied always on either the top PCM callbacks or the
external call from DAPM, not taken in the internal functions.

2) the FE stream lock is taken in higher levels before invoking
dpcm_be_dai_trigger()

3) when adding and deleting a BE, both the pcm_mutex and FE stream
lock are taken.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
[clarification of commit message by plbossart]
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/20211207173745.15850-4-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 8e6dd8a257c5..5872a8864f3b 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -893,8 +893,6 @@ struct snd_soc_card {
 	struct mutex pcm_mutex;
 	enum snd_soc_pcm_subclass pcm_subclass;
 
-	spinlock_t dpcm_lock;
-
 	int (*probe)(struct snd_soc_card *card);
 	int (*late_probe)(struct snd_soc_card *card);
 	int (*remove)(struct snd_soc_card *card);
-- 
cgit v1.2.3


From 848aedfdc6ba25ad5652797db9266007773e44dd Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 7 Dec 2021 11:37:44 -0600
Subject: ASoC: soc-pcm: test refcount before triggering

On start/pause_release/resume, when more than one FE is connected to
the same BE, it's possible that the trigger is sent more than
once. This is not desirable, we only want to trigger a BE once, which
is straightforward to implement with a refcount.

For stop/pause/suspend, the problem is more complicated: the check
implemented in snd_soc_dpcm_can_be_free_stop() may fail due to a
conceptual deadlock when we trigger the BE before the FE. In this
case, the FE states have not yet changed, so there are corner cases
where the TRIGGER_STOP is never sent - the dual case of start where
multiple triggers might be sent.

This patch suggests an unconditional trigger in all cases, without
checking the FE states, using a refcount protected by the BE PCM
stream lock.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/20211207173745.15850-6-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dpcm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index bc7af90099a8..75b92d883976 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -101,6 +101,8 @@ struct snd_soc_dpcm_runtime {
 	enum snd_soc_dpcm_state state;
 
 	int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */
+
+	int be_start; /* refcount protected by BE stream pcm lock */
 };
 
 #define for_each_dpcm_fe(be, stream, _dpcm)				\
-- 
cgit v1.2.3


From 326db0dc00e57432b689349b4da3e86c90d5d61a Mon Sep 17 00:00:00 2001
From: Yann Dirson <ydirson@free.fr>
Date: Tue, 14 Dec 2021 00:30:30 +0100
Subject: amdgpu: fix some comment typos

Signed-off-by: Yann Dirson <ydirson@free.fr>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 26e45fc5eb1a..0b94ec7b73e7 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -80,7 +80,7 @@ extern "C" {
  *
  * %AMDGPU_GEM_DOMAIN_GTT	GPU accessible system memory, mapped into the
  * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
- * pages of system memory, allows GPU access system memory in a linezrized
+ * pages of system memory, allows GPU access system memory in a linearized
  * fashion.
  *
  * %AMDGPU_GEM_DOMAIN_VRAM	Local video memory. For APUs, it is memory
-- 
cgit v1.2.3


From 0ae8c6252888d487f69b406369c3176172bb2064 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Thu, 9 Dec 2021 14:18:41 +0530
Subject: dt-bindings: interconnect: Add Qualcomm SM8450 DT bindings

The Qualcomm SM8450 SoC has several bus fabrics that could be
controlled and tuned dynamically according to the bandwidth demand

Signed-off-by: Vinod Koul <vkoul@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211209084842.189627-2-vkoul@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,sm8450.h | 171 +++++++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,sm8450.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,sm8450.h b/include/dt-bindings/interconnect/qcom,sm8450.h
new file mode 100644
index 000000000000..8f3c5e1fb4c4
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,sm8450.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Limited
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SM8450_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_SM8450_H
+
+#define MASTER_QSPI_0				0
+#define MASTER_QUP_1				1
+#define MASTER_A1NOC_CFG			2
+#define MASTER_SDCC_4				3
+#define MASTER_UFS_MEM				4
+#define MASTER_USB3_0				5
+#define SLAVE_A1NOC_SNOC			6
+#define SLAVE_SERVICE_A1NOC			7
+
+#define	MASTER_QDSS_BAM				0
+#define	MASTER_QUP_0				1
+#define	MASTER_QUP_2				2
+#define	MASTER_A2NOC_CFG			3
+#define	MASTER_CRYPTO				4
+#define	MASTER_IPA				5
+#define	MASTER_SENSORS_PROC			6
+#define	MASTER_SP				7
+#define	MASTER_QDSS_ETR				8
+#define	MASTER_QDSS_ETR_1			9
+#define	MASTER_SDCC_2				10
+#define	SLAVE_A2NOC_SNOC			11
+#define	SLAVE_SERVICE_A2NOC			12
+
+#define MASTER_QUP_CORE_0			0
+#define MASTER_QUP_CORE_1			1
+#define MASTER_QUP_CORE_2			2
+#define SLAVE_QUP_CORE_0			3
+#define SLAVE_QUP_CORE_1			4
+#define SLAVE_QUP_CORE_2			5
+
+#define	MASTER_GEM_NOC_CNOC			0
+#define	MASTER_GEM_NOC_PCIE_SNOC		1
+#define	SLAVE_AHB2PHY_SOUTH			2
+#define	SLAVE_AHB2PHY_NORTH			3
+#define	SLAVE_AOSS			        4
+#define	SLAVE_CAMERA_CFG			5
+#define	SLAVE_CLK_CTL			        6
+#define	SLAVE_CDSP_CFG			        7
+#define	SLAVE_RBCPR_CX_CFG			8
+#define	SLAVE_RBCPR_MMCX_CFG			9
+#define	SLAVE_RBCPR_MXA_CFG			10
+#define	SLAVE_RBCPR_MXC_CFG			11
+#define	SLAVE_CRYPTO_0_CFG			12
+#define	SLAVE_CX_RDPM				13
+#define	SLAVE_DISPLAY_CFG			14
+#define	SLAVE_GFX3D_CFG			        15
+#define	SLAVE_IMEM_CFG			        16
+#define	SLAVE_IPA_CFG			        17
+#define	SLAVE_IPC_ROUTER_CFG			18
+#define	SLAVE_LPASS			        19
+#define	SLAVE_CNOC_MSS			        20
+#define	SLAVE_MX_RDPM				21
+#define	SLAVE_PCIE_0_CFG			22
+#define	SLAVE_PCIE_1_CFG			23
+#define	SLAVE_PDM				24
+#define	SLAVE_PIMEM_CFG				25
+#define	SLAVE_PRNG				26
+#define	SLAVE_QDSS_CFG				27
+#define	SLAVE_QSPI_0				28
+#define	SLAVE_QUP_0				29
+#define	SLAVE_QUP_1				30
+#define	SLAVE_QUP_2				31
+#define	SLAVE_SDCC_2				32
+#define	SLAVE_SDCC_4				33
+#define	SLAVE_SPSS_CFG				34
+#define	SLAVE_TCSR				35
+#define	SLAVE_TLMM				36
+#define	SLAVE_TME_CFG				37
+#define	SLAVE_UFS_MEM_CFG			38
+#define	SLAVE_USB3_0				39
+#define	SLAVE_VENUS_CFG				40
+#define	SLAVE_VSENSE_CTRL_CFG			41
+#define	SLAVE_A1NOC_CFG				42
+#define	SLAVE_A2NOC_CFG				43
+#define	SLAVE_DDRSS_CFG				44
+#define	SLAVE_CNOC_MNOC_CFG			45
+#define	SLAVE_PCIE_ANOC_CFG			46
+#define	SLAVE_SNOC_CFG				47
+#define	SLAVE_IMEM				48
+#define	SLAVE_PIMEM				49
+#define	SLAVE_SERVICE_CNOC			50
+#define	SLAVE_PCIE_0				51
+#define	SLAVE_PCIE_1				52
+#define	SLAVE_QDSS_STM				53
+#define	SLAVE_TCU				54
+
+#define MASTER_GPU_TCU				0
+#define MASTER_SYS_TCU				1
+#define MASTER_APPSS_PROC			2
+#define MASTER_GFX3D				3
+#define MASTER_MSS_PROC				4
+#define MASTER_MNOC_HF_MEM_NOC			5
+#define MASTER_MNOC_SF_MEM_NOC			6
+#define MASTER_COMPUTE_NOC			7
+#define MASTER_ANOC_PCIE_GEM_NOC		8
+#define MASTER_SNOC_GC_MEM_NOC			9
+#define MASTER_SNOC_SF_MEM_NOC			10
+#define SLAVE_GEM_NOC_CNOC			11
+#define SLAVE_LLCC				12
+#define SLAVE_MEM_NOC_PCIE_SNOC			13
+#define MASTER_MNOC_HF_MEM_NOC_DISP		14
+#define MASTER_MNOC_SF_MEM_NOC_DISP		15
+#define MASTER_ANOC_PCIE_GEM_NOC_DISP		16
+#define SLAVE_LLCC_DISP				17
+
+#define MASTER_CNOC_LPASS_AG_NOC		0
+#define MASTER_LPASS_PROC			1
+#define SLAVE_LPASS_CORE_CFG			2
+#define SLAVE_LPASS_LPI_CFG			3
+#define SLAVE_LPASS_MPU_CFG			4
+#define SLAVE_LPASS_TOP_CFG			5
+#define SLAVE_LPASS_SNOC			6
+#define SLAVE_SERVICES_LPASS_AML_NOC		7
+#define SLAVE_SERVICE_LPASS_AG_NOC		8
+
+#define MASTER_LLCC				0
+#define SLAVE_EBI1				1
+#define MASTER_LLCC_DISP			2
+#define SLAVE_EBI1_DISP				3
+
+#define MASTER_CAMNOC_HF			0
+#define MASTER_CAMNOC_ICP			1
+#define MASTER_CAMNOC_SF			2
+#define MASTER_MDP				3
+#define MASTER_CNOC_MNOC_CFG			4
+#define MASTER_ROTATOR				5
+#define MASTER_CDSP_HCP				6
+#define MASTER_VIDEO				7
+#define MASTER_VIDEO_CV_PROC			8
+#define MASTER_VIDEO_PROC			9
+#define MASTER_VIDEO_V_PROC			10
+#define SLAVE_MNOC_HF_MEM_NOC			11
+#define SLAVE_MNOC_SF_MEM_NOC			12
+#define SLAVE_SERVICE_MNOC			13
+#define MASTER_MDP_DISP				14
+#define MASTER_ROTATOR_DISP			15
+#define SLAVE_MNOC_HF_MEM_NOC_DISP		16
+#define SLAVE_MNOC_SF_MEM_NOC_DISP		17
+
+#define MASTER_CDSP_NOC_CFG			0
+#define MASTER_CDSP_PROC			1
+#define SLAVE_CDSP_MEM_NOC			2
+#define SLAVE_SERVICE_NSP_NOC			3
+
+#define MASTER_PCIE_ANOC_CFG			0
+#define MASTER_PCIE_0				1
+#define MASTER_PCIE_1				2
+#define SLAVE_ANOC_PCIE_GEM_NOC			3
+#define SLAVE_SERVICE_PCIE_ANOC			4
+
+#define MASTER_GIC_AHB				0
+#define MASTER_A1NOC_SNOC			1
+#define MASTER_A2NOC_SNOC			2
+#define MASTER_LPASS_ANOC			3
+#define MASTER_SNOC_CFG				4
+#define MASTER_PIMEM				5
+#define MASTER_GIC				6
+#define SLAVE_SNOC_GEM_NOC_GC			7
+#define SLAVE_SNOC_GEM_NOC_SF			8
+#define SLAVE_SERVICE_SNOC			9
+
+#endif
-- 
cgit v1.2.3


From 0045e0d3f42ed7d05434bb5bc16acfc793ea4891 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Tue, 7 Dec 2021 20:49:01 +0800
Subject: RDMA/hns: Support direct wqe of userspace

The current write wqe mechanism is to write to DDR first, and then notify
the hardware through doorbell to read the data. Direct wqe is a mechanism
to fill wqe directly into the hardware. In the case of light load, the wqe
will be filled into pcie bar space of the hardware, this will reduce one
memory access operation and therefore reduce the latency. SIMD
instructions allows cpu to write the 512 bits at one time to device
memory, thus it can be used for posting direct wqe.

Add direct wqe enable switch and address mapping.

Link: https://lore.kernel.org/r/20211207124901.42123-2-liangwenpeng@huawei.com
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/hns-abi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 42b177655560..f6fde06db4b4 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -77,10 +77,12 @@ enum hns_roce_qp_cap_flags {
 	HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
 	HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
 	HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+	HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
 };
 
 struct hns_roce_ib_create_qp_resp {
 	__aligned_u64 cap_flags;
+	__aligned_u64 dwqe_mmap_key;
 };
 
 struct hns_roce_ib_alloc_ucontext_resp {
-- 
cgit v1.2.3


From 9280ac2e6f199cddcd746a9ba459136b8666287b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 14 Dec 2021 07:15:15 -0800
Subject: net: dev_replace_track() cleanup

Use existing helpers (netdev_tracker_free()
and netdev_tracker_alloc()) to remove ifdefery.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20211214151515.312535-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 235d5d082f1a..c06e9dc1a317 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3885,16 +3885,14 @@ static inline void dev_replace_track(struct net_device *odev,
 				     netdevice_tracker *tracker,
 				     gfp_t gfp)
 {
-#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
 	if (odev)
-		ref_tracker_free(&odev->refcnt_tracker, tracker);
-#endif
+		netdev_tracker_free(odev, tracker);
+
 	dev_hold(ndev);
 	dev_put(odev);
-#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+
 	if (ndev)
-		ref_tracker_alloc(&ndev->refcnt_tracker, tracker, gfp);
-#endif
+		netdev_tracker_alloc(ndev, tracker, gfp);
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
-- 
cgit v1.2.3


From 6813b1928758ce64fabbb8ef157f994b7c2235fa Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Tue, 14 Dec 2021 15:16:04 -0800
Subject: mptcp: add missing documented NL params

'loc_id' and 'rem_id' are set in all events linked to subflows but those
were missing in the events description in the comments.

Fixes: b911c97c7dc7 ("mptcp: add netlink event support")
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mptcp.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index c8cc46f80a16..f106a3941cdf 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -136,19 +136,21 @@ struct mptcp_info {
  * MPTCP_EVENT_REMOVED: token, rem_id
  * An address has been lost by the peer.
  *
- * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
- *                              daddr4 | daddr6, sport, dport, backup,
- *                              if_idx [, error]
+ * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id,
+ *                              saddr4 | saddr6, daddr4 | daddr6, sport,
+ *                              dport, backup, if_idx [, error]
  * A new subflow has been established. 'error' should not be set.
  *
- * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
- *                         sport, dport, backup, if_idx [, error]
+ * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6,
+ *                         daddr4 | daddr6, sport, dport, backup, if_idx
+ *                         [, error]
  * A subflow has been closed. An error (copy of sk_err) could be set if an
  * error has been detected for this subflow.
  *
- * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
- *                           sport, dport, backup, if_idx [, error]
- *       The priority of a subflow has changed. 'error' should not be set.
+ * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6,
+ *                           daddr4 | daddr6, sport, dport, backup, if_idx
+ *                           [, error]
+ * The priority of a subflow has changed. 'error' should not be set.
  */
 enum mptcp_event_type {
 	MPTCP_EVENT_UNSPEC = 0,
-- 
cgit v1.2.3


From 8f8ef3860d4403d1bf0887380f9e3376be092c40 Mon Sep 17 00:00:00 2001
From: Vamsi krishna Lanka <quic_vamslank@quicinc.com>
Date: Mon, 6 Dec 2021 23:32:49 -0800
Subject: dt-bindings: clock: Add SDX65 GCC clock bindings

Add device tree bindings for global clock controller on SDX65 SOCs.

Signed-off-by: Vamsi Krishna Lanka <quic_vamslank@quicinc.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/e15509b2b7c9b600ab38c5269d4fac609c077b5b.1638861860.git.quic_vamslank@quicinc.com
---
 include/dt-bindings/clock/qcom,gcc-sdx65.h | 122 +++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,gcc-sdx65.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-sdx65.h b/include/dt-bindings/clock/qcom,gcc-sdx65.h
new file mode 100644
index 000000000000..75ecc9237d8f
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-sdx65.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SDX65_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SDX65_H
+
+/* GCC clocks */
+#define GPLL0							0
+#define GPLL0_OUT_EVEN						1
+#define GCC_AHB_PCIE_LINK_CLK					2
+#define GCC_BLSP1_AHB_CLK					3
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK				4
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK_SRC				5
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK				6
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK_SRC				7
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK				8
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK_SRC				9
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK				10
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK_SRC				11
+#define GCC_BLSP1_QUP3_I2C_APPS_CLK				12
+#define GCC_BLSP1_QUP3_I2C_APPS_CLK_SRC				13
+#define GCC_BLSP1_QUP3_SPI_APPS_CLK				14
+#define GCC_BLSP1_QUP3_SPI_APPS_CLK_SRC				15
+#define GCC_BLSP1_QUP4_I2C_APPS_CLK				16
+#define GCC_BLSP1_QUP4_I2C_APPS_CLK_SRC				17
+#define GCC_BLSP1_QUP4_SPI_APPS_CLK				18
+#define GCC_BLSP1_QUP4_SPI_APPS_CLK_SRC				19
+#define GCC_BLSP1_SLEEP_CLK					20
+#define GCC_BLSP1_UART1_APPS_CLK				21
+#define GCC_BLSP1_UART1_APPS_CLK_SRC				22
+#define GCC_BLSP1_UART2_APPS_CLK				23
+#define GCC_BLSP1_UART2_APPS_CLK_SRC				24
+#define GCC_BLSP1_UART3_APPS_CLK				25
+#define GCC_BLSP1_UART3_APPS_CLK_SRC				26
+#define GCC_BLSP1_UART4_APPS_CLK				27
+#define GCC_BLSP1_UART4_APPS_CLK_SRC				28
+#define GCC_BOOT_ROM_AHB_CLK					29
+#define GCC_CPUSS_AHB_CLK					30
+#define GCC_CPUSS_AHB_CLK_SRC					31
+#define GCC_CPUSS_AHB_POSTDIV_CLK_SRC				32
+#define GCC_CPUSS_GNOC_CLK					33
+#define GCC_GP1_CLK						34
+#define GCC_GP1_CLK_SRC						35
+#define GCC_GP2_CLK						36
+#define GCC_GP2_CLK_SRC						37
+#define GCC_GP3_CLK						38
+#define GCC_GP3_CLK_SRC						39
+#define GCC_PCIE_0_CLKREF_EN					40
+#define GCC_PCIE_AUX_CLK					41
+#define GCC_PCIE_AUX_CLK_SRC					42
+#define GCC_PCIE_AUX_PHY_CLK_SRC				43
+#define GCC_PCIE_CFG_AHB_CLK					44
+#define GCC_PCIE_MSTR_AXI_CLK					45
+#define GCC_PCIE_PIPE_CLK					46
+#define GCC_PCIE_PIPE_CLK_SRC					47
+#define GCC_PCIE_RCHNG_PHY_CLK					48
+#define GCC_PCIE_RCHNG_PHY_CLK_SRC				49
+#define GCC_PCIE_SLEEP_CLK					50
+#define GCC_PCIE_SLV_AXI_CLK					51
+#define GCC_PCIE_SLV_Q2A_AXI_CLK				52
+#define GCC_PDM2_CLK						53
+#define GCC_PDM2_CLK_SRC					54
+#define GCC_PDM_AHB_CLK						55
+#define GCC_PDM_XO4_CLK						56
+#define GCC_RX1_USB2_CLKREF_EN					57
+#define GCC_SDCC1_AHB_CLK					58
+#define GCC_SDCC1_APPS_CLK					59
+#define GCC_SDCC1_APPS_CLK_SRC					60
+#define GCC_SPMI_FETCHER_AHB_CLK				61
+#define GCC_SPMI_FETCHER_CLK					62
+#define GCC_SPMI_FETCHER_CLK_SRC				63
+#define GCC_SYS_NOC_CPUSS_AHB_CLK				64
+#define GCC_USB30_MASTER_CLK					65
+#define GCC_USB30_MASTER_CLK_SRC				66
+#define GCC_USB30_MOCK_UTMI_CLK					67
+#define GCC_USB30_MOCK_UTMI_CLK_SRC				68
+#define GCC_USB30_MOCK_UTMI_POSTDIV_CLK_SRC			69
+#define GCC_USB30_MSTR_AXI_CLK					70
+#define GCC_USB30_SLEEP_CLK					71
+#define GCC_USB30_SLV_AHB_CLK					72
+#define GCC_USB3_PHY_AUX_CLK					73
+#define GCC_USB3_PHY_AUX_CLK_SRC				74
+#define GCC_USB3_PHY_PIPE_CLK					75
+#define GCC_USB3_PHY_PIPE_CLK_SRC				76
+#define GCC_USB3_PRIM_CLKREF_EN					77
+#define GCC_USB_PHY_CFG_AHB2PHY_CLK				78
+#define GCC_XO_DIV4_CLK						79
+#define GCC_XO_PCIE_LINK_CLK					80
+
+/* GCC resets */
+#define GCC_BLSP1_QUP1_BCR					0
+#define GCC_BLSP1_QUP2_BCR					1
+#define GCC_BLSP1_QUP3_BCR					2
+#define GCC_BLSP1_QUP4_BCR					3
+#define GCC_BLSP1_UART1_BCR					4
+#define GCC_BLSP1_UART2_BCR					5
+#define GCC_BLSP1_UART3_BCR					6
+#define GCC_BLSP1_UART4_BCR					7
+#define GCC_PCIE_BCR						8
+#define GCC_PCIE_LINK_DOWN_BCR					9
+#define GCC_PCIE_NOCSR_COM_PHY_BCR				10
+#define GCC_PCIE_PHY_BCR					11
+#define GCC_PCIE_PHY_CFG_AHB_BCR				12
+#define GCC_PCIE_PHY_COM_BCR					13
+#define GCC_PCIE_PHY_NOCSR_COM_PHY_BCR				14
+#define GCC_PDM_BCR						15
+#define GCC_QUSB2PHY_BCR					16
+#define GCC_SDCC1_BCR						17
+#define GCC_SPMI_FETCHER_BCR					18
+#define GCC_TCSR_PCIE_BCR					19
+#define GCC_USB30_BCR						20
+#define GCC_USB3_PHY_BCR					21
+#define GCC_USB3PHY_PHY_BCR					22
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR				23
+
+/* GCC power domains */
+#define USB30_GDSC                                              0
+#define PCIE_GDSC                                               1
+
+#endif
-- 
cgit v1.2.3


From 72a0ca203ca7fae34fe61668906fe483b97d9039 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Tue, 7 Dec 2021 17:10:02 +0530
Subject: dt-bindings: clock: Add SM8450 GCC clock bindings

Add device tree bindings for global clock controller on SM8450 SoCs.

Signed-off-by: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211207114003.100693-2-vkoul@kernel.org
---
 include/dt-bindings/clock/qcom,gcc-sm8450.h | 244 ++++++++++++++++++++++++++++
 1 file changed, 244 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,gcc-sm8450.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-sm8450.h b/include/dt-bindings/clock/qcom,gcc-sm8450.h
new file mode 100644
index 000000000000..cf1469312c4c
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-sm8450.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Linaro Limited
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SM8450_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SM8450_H
+
+/* GCC HW clocks */
+#define CORE_BI_PLL_TEST_SE					0
+#define PCIE_0_PIPE_CLK						1
+#define PCIE_1_PHY_AUX_CLK					2
+#define PCIE_1_PIPE_CLK						3
+#define UFS_PHY_RX_SYMBOL_0_CLK					4
+#define UFS_PHY_RX_SYMBOL_1_CLK					5
+#define UFS_PHY_TX_SYMBOL_0_CLK					6
+#define USB3_PHY_WRAPPER_GCC_USB30_PIPE_CLK			7
+
+/* GCC clocks */
+#define GCC_AGGRE_NOC_PCIE_0_AXI_CLK				8
+#define GCC_AGGRE_NOC_PCIE_1_AXI_CLK				9
+#define GCC_AGGRE_UFS_PHY_AXI_CLK				10
+#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK			11
+#define GCC_AGGRE_USB3_PRIM_AXI_CLK				12
+#define GCC_ANOC_PCIE_PWRCTL_CLK				13
+#define GCC_BOOT_ROM_AHB_CLK					14
+#define GCC_CAMERA_AHB_CLK					15
+#define GCC_CAMERA_HF_AXI_CLK					16
+#define GCC_CAMERA_SF_AXI_CLK					17
+#define GCC_CAMERA_XO_CLK					18
+#define GCC_CFG_NOC_PCIE_ANOC_AHB_CLK				19
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK				20
+#define GCC_CPUSS_AHB_CLK					21
+#define GCC_CPUSS_AHB_CLK_SRC					22
+#define GCC_CPUSS_AHB_POSTDIV_CLK_SRC				23
+#define GCC_CPUSS_CONFIG_NOC_SF_CLK				24
+#define GCC_DDRSS_GPU_AXI_CLK					25
+#define GCC_DDRSS_PCIE_SF_TBU_CLK				26
+#define GCC_DISP_AHB_CLK					27
+#define GCC_DISP_HF_AXI_CLK					28
+#define GCC_DISP_SF_AXI_CLK					29
+#define GCC_DISP_XO_CLK						30
+#define GCC_EUSB3_0_CLKREF_EN					31
+#define GCC_GP1_CLK						32
+#define GCC_GP1_CLK_SRC						33
+#define GCC_GP2_CLK						34
+#define GCC_GP2_CLK_SRC						35
+#define GCC_GP3_CLK						36
+#define GCC_GP3_CLK_SRC						37
+#define GCC_GPLL0						38
+#define GCC_GPLL0_OUT_EVEN					39
+#define GCC_GPLL4						40
+#define GCC_GPLL9						41
+#define GCC_GPU_CFG_AHB_CLK					42
+#define GCC_GPU_GPLL0_CLK_SRC					43
+#define GCC_GPU_GPLL0_DIV_CLK_SRC				44
+#define GCC_GPU_MEMNOC_GFX_CLK					45
+#define GCC_GPU_SNOC_DVM_GFX_CLK				46
+#define GCC_PCIE_0_AUX_CLK					47
+#define GCC_PCIE_0_AUX_CLK_SRC					48
+#define GCC_PCIE_0_CFG_AHB_CLK					49
+#define GCC_PCIE_0_CLKREF_EN					50
+#define GCC_PCIE_0_MSTR_AXI_CLK					51
+#define GCC_PCIE_0_PHY_RCHNG_CLK				52
+#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC				53
+#define GCC_PCIE_0_PIPE_CLK					54
+#define GCC_PCIE_0_PIPE_CLK_SRC					55
+#define GCC_PCIE_0_SLV_AXI_CLK					56
+#define GCC_PCIE_0_SLV_Q2A_AXI_CLK				57
+#define GCC_PCIE_1_AUX_CLK					58
+#define GCC_PCIE_1_AUX_CLK_SRC					59
+#define GCC_PCIE_1_CFG_AHB_CLK					60
+#define GCC_PCIE_1_CLKREF_EN					61
+#define GCC_PCIE_1_MSTR_AXI_CLK					62
+#define GCC_PCIE_1_PHY_AUX_CLK					63
+#define GCC_PCIE_1_PHY_AUX_CLK_SRC				64
+#define GCC_PCIE_1_PHY_RCHNG_CLK				65
+#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC				66
+#define GCC_PCIE_1_PIPE_CLK					67
+#define GCC_PCIE_1_PIPE_CLK_SRC					68
+#define GCC_PCIE_1_SLV_AXI_CLK					69
+#define GCC_PCIE_1_SLV_Q2A_AXI_CLK				70
+#define GCC_PDM2_CLK						71
+#define GCC_PDM2_CLK_SRC					72
+#define GCC_PDM_AHB_CLK						73
+#define GCC_PDM_XO4_CLK						74
+#define GCC_QMIP_CAMERA_NRT_AHB_CLK				75
+#define GCC_QMIP_CAMERA_RT_AHB_CLK				76
+#define GCC_QMIP_DISP_AHB_CLK					77
+#define GCC_QMIP_GPU_AHB_CLK					78
+#define GCC_QMIP_PCIE_AHB_CLK					79
+#define GCC_QMIP_VIDEO_CV_CPU_AHB_CLK				80
+#define GCC_QMIP_VIDEO_CVP_AHB_CLK				81
+#define GCC_QMIP_VIDEO_V_CPU_AHB_CLK				82
+#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK				83
+#define GCC_QUPV3_WRAP0_CORE_2X_CLK				84
+#define GCC_QUPV3_WRAP0_CORE_CLK				85
+#define GCC_QUPV3_WRAP0_S0_CLK					86
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC				87
+#define GCC_QUPV3_WRAP0_S1_CLK					88
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC				89
+#define GCC_QUPV3_WRAP0_S2_CLK					90
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC				91
+#define GCC_QUPV3_WRAP0_S3_CLK					92
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC				93
+#define GCC_QUPV3_WRAP0_S4_CLK					94
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC				95
+#define GCC_QUPV3_WRAP0_S5_CLK					96
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC				97
+#define GCC_QUPV3_WRAP0_S6_CLK					98
+#define GCC_QUPV3_WRAP0_S6_CLK_SRC				99
+#define GCC_QUPV3_WRAP0_S7_CLK					100
+#define GCC_QUPV3_WRAP0_S7_CLK_SRC				101
+#define GCC_QUPV3_WRAP1_CORE_2X_CLK				102
+#define GCC_QUPV3_WRAP1_CORE_CLK				103
+#define GCC_QUPV3_WRAP1_S0_CLK					104
+#define GCC_QUPV3_WRAP1_S0_CLK_SRC				105
+#define GCC_QUPV3_WRAP1_S1_CLK					106
+#define GCC_QUPV3_WRAP1_S1_CLK_SRC				107
+#define GCC_QUPV3_WRAP1_S2_CLK					108
+#define GCC_QUPV3_WRAP1_S2_CLK_SRC				109
+#define GCC_QUPV3_WRAP1_S3_CLK					110
+#define GCC_QUPV3_WRAP1_S3_CLK_SRC				111
+#define GCC_QUPV3_WRAP1_S4_CLK					112
+#define GCC_QUPV3_WRAP1_S4_CLK_SRC				113
+#define GCC_QUPV3_WRAP1_S5_CLK					114
+#define GCC_QUPV3_WRAP1_S5_CLK_SRC				115
+#define GCC_QUPV3_WRAP1_S6_CLK					116
+#define GCC_QUPV3_WRAP1_S6_CLK_SRC				117
+#define GCC_QUPV3_WRAP2_CORE_2X_CLK				118
+#define GCC_QUPV3_WRAP2_CORE_CLK				119
+#define GCC_QUPV3_WRAP2_S0_CLK					120
+#define GCC_QUPV3_WRAP2_S0_CLK_SRC				121
+#define GCC_QUPV3_WRAP2_S1_CLK					122
+#define GCC_QUPV3_WRAP2_S1_CLK_SRC				123
+#define GCC_QUPV3_WRAP2_S2_CLK					124
+#define GCC_QUPV3_WRAP2_S2_CLK_SRC				125
+#define GCC_QUPV3_WRAP2_S3_CLK					126
+#define GCC_QUPV3_WRAP2_S3_CLK_SRC				127
+#define GCC_QUPV3_WRAP2_S4_CLK					128
+#define GCC_QUPV3_WRAP2_S4_CLK_SRC				129
+#define GCC_QUPV3_WRAP2_S5_CLK					130
+#define GCC_QUPV3_WRAP2_S5_CLK_SRC				131
+#define GCC_QUPV3_WRAP2_S6_CLK					132
+#define GCC_QUPV3_WRAP2_S6_CLK_SRC				133
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK				134
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK				135
+#define GCC_QUPV3_WRAP_1_M_AHB_CLK				136
+#define GCC_QUPV3_WRAP_1_S_AHB_CLK				137
+#define GCC_QUPV3_WRAP_2_M_AHB_CLK				138
+#define GCC_QUPV3_WRAP_2_S_AHB_CLK				139
+#define GCC_SDCC2_AHB_CLK					140
+#define GCC_SDCC2_APPS_CLK					141
+#define GCC_SDCC2_APPS_CLK_SRC					142
+#define GCC_SDCC2_AT_CLK					143
+#define GCC_SDCC4_AHB_CLK					144
+#define GCC_SDCC4_APPS_CLK					145
+#define GCC_SDCC4_APPS_CLK_SRC					146
+#define GCC_SDCC4_AT_CLK					147
+#define GCC_SYS_NOC_CPUSS_AHB_CLK				148
+#define GCC_UFS_0_CLKREF_EN					149
+#define GCC_UFS_PHY_AHB_CLK					150
+#define GCC_UFS_PHY_AXI_CLK					151
+#define GCC_UFS_PHY_AXI_CLK_SRC					152
+#define GCC_UFS_PHY_AXI_HW_CTL_CLK				153
+#define GCC_UFS_PHY_ICE_CORE_CLK				154
+#define GCC_UFS_PHY_ICE_CORE_CLK_SRC				155
+#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK				156
+#define GCC_UFS_PHY_PHY_AUX_CLK					157
+#define GCC_UFS_PHY_PHY_AUX_CLK_SRC				158
+#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK				159
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK				160
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK_SRC				161
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK				162
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK_SRC				163
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK				164
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK_SRC				165
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK				166
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC				167
+#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK			168
+#define GCC_USB30_PRIM_MASTER_CLK				169
+#define GCC_USB30_PRIM_MASTER_CLK_SRC				170
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK				171
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC			172
+#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC		173
+#define GCC_USB30_PRIM_SLEEP_CLK				174
+#define GCC_USB3_0_CLKREF_EN					175
+#define GCC_USB3_PRIM_PHY_AUX_CLK				176
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC				177
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK				178
+#define GCC_USB3_PRIM_PHY_PIPE_CLK				179
+#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC				180
+#define GCC_VIDEO_AHB_CLK					181
+#define GCC_VIDEO_AXI0_CLK					182
+#define GCC_VIDEO_AXI1_CLK					183
+#define GCC_VIDEO_XO_CLK					184
+
+/* GCC resets */
+#define GCC_CAMERA_BCR						0
+#define GCC_DISPLAY_BCR						1
+#define GCC_GPU_BCR						2
+#define GCC_MMSS_BCR						3
+#define GCC_PCIE_0_BCR						4
+#define GCC_PCIE_0_LINK_DOWN_BCR				5
+#define GCC_PCIE_0_NOCSR_COM_PHY_BCR				6
+#define GCC_PCIE_0_PHY_BCR					7
+#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR			8
+#define GCC_PCIE_1_BCR						9
+#define GCC_PCIE_1_LINK_DOWN_BCR				10
+#define GCC_PCIE_1_NOCSR_COM_PHY_BCR				11
+#define GCC_PCIE_1_PHY_BCR					12
+#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR			13
+#define GCC_PCIE_PHY_BCR					14
+#define GCC_PCIE_PHY_CFG_AHB_BCR				15
+#define GCC_PCIE_PHY_COM_BCR					16
+#define GCC_PDM_BCR						17
+#define GCC_QUPV3_WRAPPER_0_BCR					18
+#define GCC_QUPV3_WRAPPER_1_BCR					19
+#define GCC_QUPV3_WRAPPER_2_BCR					20
+#define GCC_QUSB2PHY_PRIM_BCR					21
+#define GCC_QUSB2PHY_SEC_BCR					22
+#define GCC_SDCC2_BCR						23
+#define GCC_SDCC4_BCR						24
+#define GCC_UFS_PHY_BCR						25
+#define GCC_USB30_PRIM_BCR					26
+#define GCC_USB3_DP_PHY_PRIM_BCR				27
+#define GCC_USB3_DP_PHY_SEC_BCR					28
+#define GCC_USB3_PHY_PRIM_BCR					29
+#define GCC_USB3_PHY_SEC_BCR					30
+#define GCC_USB3PHY_PHY_PRIM_BCR				31
+#define GCC_USB3PHY_PHY_SEC_BCR					32
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR				33
+#define GCC_VIDEO_AXI0_CLK_ARES					34
+#define GCC_VIDEO_AXI1_CLK_ARES					35
+#define GCC_VIDEO_BCR						36
+
+/* GCC power domains */
+#define PCIE_0_GDSC						0
+#define PCIE_1_GDSC						1
+#define UFS_PHY_GDSC						2
+#define USB30_PRIM_GDSC						3
+
+#endif
-- 
cgit v1.2.3


From 061dbde2bf3b12d80a4efd4b40db0b272e55b7f5 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 15 Dec 2021 08:23:23 +0800
Subject: dt-bindings: interconnect: Add Qualcomm QCM2290 NoC support

Add bindings for Qualcomm QCM2290 Network-On-Chip interconnect devices.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211215002324.1727-5-shawn.guo@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,qcm2290.h | 94 +++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,qcm2290.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,qcm2290.h b/include/dt-bindings/interconnect/qcom,qcm2290.h
new file mode 100644
index 000000000000..6cbbb7fe0bd3
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,qcm2290.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* QCM2290 interconnect IDs */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCM2290_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_QCM2290_H
+
+/* BIMC */
+#define MASTER_APPSS_PROC		0
+#define MASTER_SNOC_BIMC_RT		1
+#define MASTER_SNOC_BIMC_NRT		2
+#define MASTER_SNOC_BIMC		3
+#define MASTER_TCU_0			4
+#define MASTER_GFX3D			5
+#define SLAVE_EBI1			6
+#define SLAVE_BIMC_SNOC			7
+
+/* CNOC */
+#define MASTER_SNOC_CNOC		0
+#define MASTER_QDSS_DAP			1
+#define SLAVE_BIMC_CFG			2
+#define SLAVE_CAMERA_NRT_THROTTLE_CFG	3
+#define SLAVE_CAMERA_RT_THROTTLE_CFG	4
+#define SLAVE_CAMERA_CFG		5
+#define SLAVE_CLK_CTL			6
+#define SLAVE_CRYPTO_0_CFG		7
+#define SLAVE_DISPLAY_CFG		8
+#define SLAVE_DISPLAY_THROTTLE_CFG	9
+#define SLAVE_GPU_CFG			10
+#define SLAVE_HWKM			11
+#define SLAVE_IMEM_CFG			12
+#define SLAVE_IPA_CFG			13
+#define SLAVE_LPASS			14
+#define SLAVE_MESSAGE_RAM		15
+#define SLAVE_PDM			16
+#define SLAVE_PIMEM_CFG			17
+#define SLAVE_PKA_WRAPPER		18
+#define SLAVE_PMIC_ARB			19
+#define SLAVE_PRNG			20
+#define SLAVE_QDSS_CFG			21
+#define SLAVE_QM_CFG			22
+#define SLAVE_QM_MPU_CFG		23
+#define SLAVE_QPIC			24
+#define SLAVE_QUP_0			25
+#define SLAVE_SDCC_1			26
+#define SLAVE_SDCC_2			27
+#define SLAVE_SNOC_CFG			28
+#define SLAVE_TCSR			29
+#define SLAVE_USB3			30
+#define SLAVE_VENUS_CFG			31
+#define SLAVE_VENUS_THROTTLE_CFG	32
+#define SLAVE_VSENSE_CTRL_CFG		33
+#define SLAVE_SERVICE_CNOC		34
+
+/* SNOC */
+#define MASTER_CRYPTO_CORE0		0
+#define MASTER_SNOC_CFG			1
+#define MASTER_TIC			2
+#define MASTER_ANOC_SNOC		3
+#define MASTER_BIMC_SNOC		4
+#define MASTER_PIMEM			5
+#define MASTER_QDSS_BAM			6
+#define MASTER_QUP_0			7
+#define MASTER_IPA			8
+#define MASTER_QDSS_ETR			9
+#define MASTER_SDCC_1			10
+#define MASTER_SDCC_2			11
+#define MASTER_QPIC			12
+#define MASTER_USB3_0			13
+#define SLAVE_APPSS			14
+#define SLAVE_SNOC_CNOC			15
+#define SLAVE_IMEM			16
+#define SLAVE_PIMEM			17
+#define SLAVE_SNOC_BIMC			18
+#define SLAVE_SERVICE_SNOC		19
+#define SLAVE_QDSS_STM			20
+#define SLAVE_TCU			21
+#define SLAVE_ANOC_SNOC			22
+
+/* QUP Virtual */
+#define MASTER_QUP_CORE_0		0
+#define SLAVE_QUP_CORE_0		1
+
+/* MMNRT Virtual */
+#define MASTER_CAMNOC_SF		0
+#define MASTER_VIDEO_P0			1
+#define MASTER_VIDEO_PROC		2
+#define SLAVE_SNOC_BIMC_NRT		3
+
+/* MMRT Virtual */
+#define MASTER_CAMNOC_HF		0
+#define MASTER_MDP0			1
+#define SLAVE_SNOC_BIMC_RT		2
+
+#endif
-- 
cgit v1.2.3


From ad69cd9972e79aba103ba5365de0acd35770c265 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:27 +0200
Subject: fsnotify: clarify object type argument

In preparation for separating object type from iterator type, rename
some 'type' arguments in functions to 'obj_type' and remove the unused
interface to clear marks by object type mask.

Link: https://lore.kernel.org/r/20211129201537.1932819-2-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 51ef2b079bfa..b9c84b1dbcc8 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -338,6 +338,7 @@ static inline struct fs_error_report *fsnotify_data_error_report(
 }
 
 enum fsnotify_obj_type {
+	FSNOTIFY_OBJ_TYPE_ANY = -1,
 	FSNOTIFY_OBJ_TYPE_INODE,
 	FSNOTIFY_OBJ_TYPE_PARENT,
 	FSNOTIFY_OBJ_TYPE_VFSMOUNT,
@@ -346,15 +347,9 @@ enum fsnotify_obj_type {
 	FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
 };
 
-#define FSNOTIFY_OBJ_TYPE_INODE_FL	(1U << FSNOTIFY_OBJ_TYPE_INODE)
-#define FSNOTIFY_OBJ_TYPE_PARENT_FL	(1U << FSNOTIFY_OBJ_TYPE_PARENT)
-#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL	(1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-#define FSNOTIFY_OBJ_TYPE_SB_FL		(1U << FSNOTIFY_OBJ_TYPE_SB)
-#define FSNOTIFY_OBJ_ALL_TYPES_MASK	((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
-
-static inline bool fsnotify_valid_obj_type(unsigned int type)
+static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
 {
-	return (type < FSNOTIFY_OBJ_TYPE_COUNT);
+	return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT);
 }
 
 struct fsnotify_iter_info {
@@ -387,7 +382,7 @@ static inline void fsnotify_iter_set_report_type_mark(
 static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
 		struct fsnotify_iter_info *iter_info) \
 { \
-	return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \
+	return (iter_info->report_mask & (1U << FSNOTIFY_OBJ_TYPE_##NAME)) ? \
 		iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \
 }
 
@@ -604,11 +599,11 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
 				  __kernel_fsid_t *fsid);
 /* attach the mark to the object */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark,
-			     fsnotify_connp_t *connp, unsigned int type,
+			     fsnotify_connp_t *connp, unsigned int obj_type,
 			     int allow_dups, __kernel_fsid_t *fsid);
 extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 				    fsnotify_connp_t *connp,
-				    unsigned int type, int allow_dups,
+				    unsigned int obj_type, int allow_dups,
 				    __kernel_fsid_t *fsid);
 
 /* attach the mark to the inode */
@@ -637,22 +632,23 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
 extern void fsnotify_free_mark(struct fsnotify_mark *mark);
 /* Wait until all marks queued for destruction are destroyed */
 extern void fsnotify_wait_marks_destroyed(void);
-/* run all the marks in a group, and clear all of the marks attached to given object type */
-extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
+/* Clear all of the marks of a group attached to a given object type */
+extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+					  unsigned int obj_type);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
 {
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT);
 }
 /* run all the marks in a group, and clear all of the inode marks */
 static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
 {
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE);
 }
 /* run all the marks in a group, and clear all of the sn marks */
 static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group)
 {
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB);
 }
 extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
-- 
cgit v1.2.3


From 1c9007d62bea6fd164285314f7553f73e5308863 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:28 +0200
Subject: fsnotify: separate mark iterator type from object type enum

They are two different types that use the same enum, so this confusing.

Use the object type to indicate the type of object mark is attached to
and the iter type to indicate the type of watch.

A group can have two different watches of the same object type (parent
and child watches) that match the same event.

Link: https://lore.kernel.org/r/20211129201537.1932819-3-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 41 +++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b9c84b1dbcc8..73739fee1710 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -337,10 +337,25 @@ static inline struct fs_error_report *fsnotify_data_error_report(
 	}
 }
 
+/*
+ * Index to merged marks iterator array that correlates to a type of watch.
+ * The type of watched object can be deduced from the iterator type, but not
+ * the other way around, because an event can match different watched objects
+ * of the same object type.
+ * For example, both parent and child are watching an object of type inode.
+ */
+enum fsnotify_iter_type {
+	FSNOTIFY_ITER_TYPE_INODE,
+	FSNOTIFY_ITER_TYPE_VFSMOUNT,
+	FSNOTIFY_ITER_TYPE_SB,
+	FSNOTIFY_ITER_TYPE_PARENT,
+	FSNOTIFY_ITER_TYPE_COUNT
+};
+
+/* The type of object that a mark is attached to */
 enum fsnotify_obj_type {
 	FSNOTIFY_OBJ_TYPE_ANY = -1,
 	FSNOTIFY_OBJ_TYPE_INODE,
-	FSNOTIFY_OBJ_TYPE_PARENT,
 	FSNOTIFY_OBJ_TYPE_VFSMOUNT,
 	FSNOTIFY_OBJ_TYPE_SB,
 	FSNOTIFY_OBJ_TYPE_COUNT,
@@ -353,37 +368,37 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
 }
 
 struct fsnotify_iter_info {
-	struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT];
+	struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
 	unsigned int report_mask;
 	int srcu_idx;
 };
 
 static inline bool fsnotify_iter_should_report_type(
-		struct fsnotify_iter_info *iter_info, int type)
+		struct fsnotify_iter_info *iter_info, int iter_type)
 {
-	return (iter_info->report_mask & (1U << type));
+	return (iter_info->report_mask & (1U << iter_type));
 }
 
 static inline void fsnotify_iter_set_report_type(
-		struct fsnotify_iter_info *iter_info, int type)
+		struct fsnotify_iter_info *iter_info, int iter_type)
 {
-	iter_info->report_mask |= (1U << type);
+	iter_info->report_mask |= (1U << iter_type);
 }
 
 static inline void fsnotify_iter_set_report_type_mark(
-		struct fsnotify_iter_info *iter_info, int type,
+		struct fsnotify_iter_info *iter_info, int iter_type,
 		struct fsnotify_mark *mark)
 {
-	iter_info->marks[type] = mark;
-	iter_info->report_mask |= (1U << type);
+	iter_info->marks[iter_type] = mark;
+	iter_info->report_mask |= (1U << iter_type);
 }
 
 #define FSNOTIFY_ITER_FUNCS(name, NAME) \
 static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
 		struct fsnotify_iter_info *iter_info) \
 { \
-	return (iter_info->report_mask & (1U << FSNOTIFY_OBJ_TYPE_##NAME)) ? \
-		iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \
+	return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \
+		iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \
 }
 
 FSNOTIFY_ITER_FUNCS(inode, INODE)
@@ -391,8 +406,8 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT)
 FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
 FSNOTIFY_ITER_FUNCS(sb, SB)
 
-#define fsnotify_foreach_obj_type(type) \
-	for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
+#define fsnotify_foreach_iter_type(type) \
+	for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
 
 /*
  * fsnotify_connp_t is what we embed in objects which connector can be attached
-- 
cgit v1.2.3


From d61fd650e9d206a71fda789f02a1ced4b19944c4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:29 +0200
Subject: fanotify: introduce group flag FAN_REPORT_TARGET_FID

FAN_REPORT_FID is ambiguous in that it reports the fid of the child for
some events and the fid of the parent for create/delete/move events.

The new FAN_REPORT_TARGET_FID flag is an implicit request to report
the fid of the target object of the operation (a.k.a the child inode)
also in create/delete/move events in addition to the fid of the parent
and the name of the child.

To reduce the test matrix for uninteresting use cases, the new
FAN_REPORT_TARGET_FID flag requires both FAN_REPORT_NAME and
FAN_REPORT_FID.  The convenience macro FAN_REPORT_DFID_NAME_TARGET
combines FAN_REPORT_TARGET_FID with all the required flags.

Link: https://lore.kernel.org/r/20211129201537.1932819-4-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fanotify.h      | 2 +-
 include/uapi/linux/fanotify.h | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 616af2ea20f3..376e050e6f38 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -25,7 +25,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
 
 #define FANOTIFY_CLASS_BITS	(FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES)
 
-#define FANOTIFY_FID_BITS	(FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
+#define FANOTIFY_FID_BITS	(FAN_REPORT_DFID_NAME_TARGET)
 
 #define FANOTIFY_INFO_MODES	(FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index bd1932c2074d..60f73639a896 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -57,9 +57,13 @@
 #define FAN_REPORT_FID		0x00000200	/* Report unique file id */
 #define FAN_REPORT_DIR_FID	0x00000400	/* Report unique directory id */
 #define FAN_REPORT_NAME		0x00000800	/* Report events with name */
+#define FAN_REPORT_TARGET_FID	0x00001000	/* Report dirent target id  */
 
 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
 #define FAN_REPORT_DFID_NAME	(FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
+/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
+#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
+				     FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
 
 /* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
-- 
cgit v1.2.3


From e54183fa7047c15819bc155f4c58501d9a9a3489 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:30 +0200
Subject: fsnotify: generate FS_RENAME event with rich information

The dnotify FS_DN_RENAME event is used to request notification about
a move within the same parent directory and was always coupled with
the FS_MOVED_FROM event.

Rename the FS_DN_RENAME event flag to FS_RENAME, decouple it from
FS_MOVED_FROM and report it with the moved dentry instead of the moved
inode, so it has the information about both old and new parent and name.

Generate the FS_RENAME event regardless of same parent dir and apply
the "same parent" rule in the generic fsnotify_handle_event() helper
that is used to call backends with ->handle_inode_event() method
(i.e. dnotify).  The ->handle_inode_event() method is not rich enough to
report both old and new parent and name anyway.

The enriched event is reported to fanotify over the ->handle_event()
method with the old and new dir inode marks in marks array slots for
ITER_TYPE_INODE and a new iter type slot ITER_TYPE_INODE2.

The enriched event will be used for reporting old and new parent+name to
fanotify groups with FAN_RENAME events.

Link: https://lore.kernel.org/r/20211129201537.1932819-5-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/dnotify.h          | 2 +-
 include/linux/fsnotify.h         | 9 ++++++---
 include/linux/fsnotify_backend.h | 7 ++++---
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index 0aad774beaec..b87c3b85a166 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -26,7 +26,7 @@ struct dnotify_struct {
 			    FS_MODIFY | FS_MODIFY_CHILD |\
 			    FS_ACCESS | FS_ACCESS_CHILD |\
 			    FS_ATTRIB | FS_ATTRIB_CHILD |\
-			    FS_CREATE | FS_DN_RENAME |\
+			    FS_CREATE | FS_RENAME |\
 			    FS_MOVED_FROM | FS_MOVED_TO)
 
 extern int dir_notify_enable;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 787545e87eeb..3a2d7dc3c607 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -144,16 +144,19 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = FS_MOVED_FROM;
 	__u32 new_dir_mask = FS_MOVED_TO;
+	__u32 rename_mask = FS_RENAME;
 	const struct qstr *new_name = &moved->d_name;
 
-	if (old_dir == new_dir)
-		old_dir_mask |= FS_DN_RENAME;
-
 	if (isdir) {
 		old_dir_mask |= FS_ISDIR;
 		new_dir_mask |= FS_ISDIR;
+		rename_mask |= FS_ISDIR;
 	}
 
+	/* Event with information about both old and new parent+name */
+	fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY,
+		      old_dir, old_name, 0);
+
 	fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE,
 		      old_dir, old_name, fs_cookie);
 	fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE,
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 73739fee1710..790c31844db5 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -63,7 +63,7 @@
  */
 #define FS_EVENT_ON_CHILD	0x08000000
 
-#define FS_DN_RENAME		0x10000000	/* file renamed */
+#define FS_RENAME		0x10000000	/* File was renamed */
 #define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
 #define FS_ISDIR		0x40000000	/* event occurred against dir */
 #define FS_IN_ONESHOT		0x80000000	/* only send event once */
@@ -76,7 +76,7 @@
  * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event
  * when a directory entry inside a child subdir changes.
  */
-#define ALL_FSNOTIFY_DIRENT_EVENTS	(FS_CREATE | FS_DELETE | FS_MOVE)
+#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
 
 #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
 				  FS_OPEN_EXEC_PERM)
@@ -101,7 +101,7 @@
 /* Events that can be reported to backends */
 #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
 			     FS_EVENTS_POSS_ON_CHILD | \
-			     FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \
+			     FS_DELETE_SELF | FS_MOVE_SELF | \
 			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
 			     FS_ERROR)
 
@@ -349,6 +349,7 @@ enum fsnotify_iter_type {
 	FSNOTIFY_ITER_TYPE_VFSMOUNT,
 	FSNOTIFY_ITER_TYPE_SB,
 	FSNOTIFY_ITER_TYPE_PARENT,
+	FSNOTIFY_ITER_TYPE_INODE2,
 	FSNOTIFY_ITER_TYPE_COUNT
 };
 
-- 
cgit v1.2.3


From 3982534ba5ce45e890b2f5ef5e7372c1accd14c7 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:34 +0200
Subject: fanotify: record old and new parent and name in FAN_RENAME event

In the special case of FAN_RENAME event, we record both the old
and new parent and name.

Link: https://lore.kernel.org/r/20211129201537.1932819-9-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/fanotify.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 60f73639a896..9d0e2dc5767b 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -28,6 +28,8 @@
 
 #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
 
+#define FAN_RENAME		0x10000000	/* File was renamed */
+
 #define FAN_ONDIR		0x40000000	/* Event occurred against dir */
 
 /* helper events */
-- 
cgit v1.2.3


From 7326e382c21e9c23c89c88369afdc90b82a14da8 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:36 +0200
Subject: fanotify: report old and/or new parent+name in FAN_RENAME event

In the special case of FAN_RENAME event, we report old or new or both
old and new parent+name.

A single info record will be reported if either the old or new dir
is watched and two records will be reported if both old and new dir
(or their filesystem) are watched.

The old and new parent+name are reported using new info record types
FAN_EVENT_INFO_TYPE_{OLD,NEW}_DFID_NAME, so if a single info record
is reported, it is clear to the application, to which dir entry the
fid+name info is referring to.

Link: https://lore.kernel.org/r/20211129201537.1932819-11-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/fanotify.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 9d0e2dc5767b..e8ac38cc2fd6 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -134,6 +134,12 @@ struct fanotify_event_metadata {
 #define FAN_EVENT_INFO_TYPE_PIDFD	4
 #define FAN_EVENT_INFO_TYPE_ERROR	5
 
+/* Special info types for FAN_RENAME */
+#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME	10
+/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID	11 */
+#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME	12
+/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID	13 */
+
 /* Variable length info record following event metadata */
 struct fanotify_event_info_header {
 	__u8 info_type;
-- 
cgit v1.2.3


From 8cc3b1ccd930fe6971e1527f0c4f1bdc8cb56026 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 29 Nov 2021 22:15:37 +0200
Subject: fanotify: wire up FAN_RENAME event

FAN_RENAME is the successor of FAN_MOVED_FROM and FAN_MOVED_TO
and can be used to get the old and new parent+name information in
a single event.

FAN_MOVED_FROM and FAN_MOVED_TO are still supported for backward
compatibility, but it makes little sense to use them together with
FAN_RENAME in the same group.

FAN_RENAME uses special info type records to report the old and
new parent+name, so reporting only old and new parent id is less
useful and was not implemented.
Therefore, FAN_REANAME requires a group with flag FAN_REPORT_NAME.

Link: https://lore.kernel.org/r/20211129201537.1932819-12-amir73il@gmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fanotify.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 376e050e6f38..3afdf339d53c 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -82,7 +82,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
  * Directory entry modification events - reported only to directory
  * where entry is modified and not to a watching parent.
  */
-#define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE)
+#define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE | \
+				 FAN_RENAME)
 
 /* Events that can be reported with event->fd */
 #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
-- 
cgit v1.2.3


From f1d9268e061863ead77b07f5a6807d063e28a1c2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 14 Dec 2021 07:09:33 -0800
Subject: net: add net device refcount tracker to struct packet_type

Most notable changes are in af_packet, tipc ones are trivial.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jon Maloy <jmaloy@redhat.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c06e9dc1a317..a419718612c6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2533,6 +2533,7 @@ struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
 	bool			ignore_outgoing;
 	struct net_device	*dev;	/* NULL is wildcarded here	     */
+	netdevice_tracker	dev_tracker;
 	int			(*func) (struct sk_buff *,
 					 struct net_device *,
 					 struct packet_type *,
-- 
cgit v1.2.3


From 685b1afd7911676691c4167f420e16a957f5a38e Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 9 Dec 2021 12:09:23 +0200
Subject: net/mlx5: Introduce log_max_current_uc_list_wr_supported bit

Downstream patch will use this bit in order to know whether the device
supports changing of max_uc_list.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3636df90899a..d3899fc33fd7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1621,7 +1621,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         ext_stride_num_range[0x1];
 	u8         roce_rw_supported[0x1];
-	u8         reserved_at_3a2[0x1];
+	u8         log_max_current_uc_list_wr_supported[0x1];
 	u8         log_max_stride_sz_rq[0x5];
 	u8         reserved_at_3a8[0x3];
 	u8         log_min_stride_sz_rq[0x5];
-- 
cgit v1.2.3


From fb82437fdd8cd8ac41b1265e40a96668e33c3a8d Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 18 Nov 2021 16:13:00 +0200
Subject: PCI: Change capability register offsets to hex

Convert offsets of capability registers from decimal to hex.  This matches
the spec documents and is less error prone.

[bhelgaas: also convert other capabilities with offsets > 8]
Suggested-by: Bjorn Helgaas <helgaas@kernel.org>
Link: https://lore.kernel.org/r/20210825160516.GA3576414@bjorn-Precision-5520/
Link: https://lore.kernel.org/r/aa067278adacbb59a675366052714081f4980f26.1637244780.git.baruch@tkos.co.il
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 138 +++++++++++++++++++++---------------------
 1 file changed, 69 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ff6ccbc6efe9..fe86f5310d76 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -301,23 +301,23 @@
 #define  PCI_SID_ESR_FIC	0x20	/* First In Chassis Flag */
 #define PCI_SID_CHASSIS_NR	3	/* Chassis Number */
 
-/* Message Signalled Interrupt registers */
+/* Message Signaled Interrupt registers */
 
-#define PCI_MSI_FLAGS		2	/* Message Control */
+#define PCI_MSI_FLAGS		0x02	/* Message Control */
 #define  PCI_MSI_FLAGS_ENABLE	0x0001	/* MSI feature enabled */
 #define  PCI_MSI_FLAGS_QMASK	0x000e	/* Maximum queue size available */
 #define  PCI_MSI_FLAGS_QSIZE	0x0070	/* Message queue size configured */
 #define  PCI_MSI_FLAGS_64BIT	0x0080	/* 64-bit addresses allowed */
 #define  PCI_MSI_FLAGS_MASKBIT	0x0100	/* Per-vector masking capable */
 #define PCI_MSI_RFU		3	/* Rest of capability flags */
-#define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
-#define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
-#define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
-#define PCI_MSI_MASK_32		12	/* Mask bits register for 32-bit devices */
-#define PCI_MSI_PENDING_32	16	/* Pending intrs for 32-bit devices */
-#define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
-#define PCI_MSI_MASK_64		16	/* Mask bits register for 64-bit devices */
-#define PCI_MSI_PENDING_64	20	/* Pending intrs for 64-bit devices */
+#define PCI_MSI_ADDRESS_LO	0x04	/* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI	0x08	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32		0x08	/* 16 bits of data for 32-bit devices */
+#define PCI_MSI_MASK_32		0x0c	/* Mask bits register for 32-bit devices */
+#define PCI_MSI_PENDING_32	0x10	/* Pending intrs for 32-bit devices */
+#define PCI_MSI_DATA_64		0x0c	/* 16 bits of data for 64-bit devices */
+#define PCI_MSI_MASK_64		0x10	/* Mask bits register for 64-bit devices */
+#define PCI_MSI_PENDING_64	0x14	/* Pending intrs for 64-bit devices */
 
 /* MSI-X registers (in MSI-X capability) */
 #define PCI_MSIX_FLAGS		2	/* Message Control */
@@ -335,10 +335,10 @@
 
 /* MSI-X Table entry format (in memory mapped by a BAR) */
 #define PCI_MSIX_ENTRY_SIZE		16
-#define PCI_MSIX_ENTRY_LOWER_ADDR	0  /* Message Address */
-#define PCI_MSIX_ENTRY_UPPER_ADDR	4  /* Message Upper Address */
-#define PCI_MSIX_ENTRY_DATA		8  /* Message Data */
-#define PCI_MSIX_ENTRY_VECTOR_CTRL	12 /* Vector Control */
+#define PCI_MSIX_ENTRY_LOWER_ADDR	0x0  /* Message Address */
+#define PCI_MSIX_ENTRY_UPPER_ADDR	0x4  /* Message Upper Address */
+#define PCI_MSIX_ENTRY_DATA		0x8  /* Message Data */
+#define PCI_MSIX_ENTRY_VECTOR_CTRL	0xc  /* Vector Control */
 #define  PCI_MSIX_ENTRY_CTRL_MASKBIT	0x00000001
 
 /* CompactPCI Hotswap Register */
@@ -470,7 +470,7 @@
 
 /* PCI Express capability registers */
 
-#define PCI_EXP_FLAGS		2	/* Capabilities register */
+#define PCI_EXP_FLAGS		0x02	/* Capabilities register */
 #define  PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
 #define  PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
 #define   PCI_EXP_TYPE_ENDPOINT	   0x0	/* Express Endpoint */
@@ -484,7 +484,7 @@
 #define   PCI_EXP_TYPE_RC_EC	   0xa	/* Root Complex Event Collector */
 #define  PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define  PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
-#define PCI_EXP_DEVCAP		4	/* Device capabilities */
+#define PCI_EXP_DEVCAP		0x04	/* Device capabilities */
 #define  PCI_EXP_DEVCAP_PAYLOAD	0x00000007 /* Max_Payload_Size */
 #define  PCI_EXP_DEVCAP_PHANTOM	0x00000018 /* Phantom functions */
 #define  PCI_EXP_DEVCAP_EXT_TAG	0x00000020 /* Extended tags */
@@ -497,7 +497,7 @@
 #define  PCI_EXP_DEVCAP_PWR_VAL	0x03fc0000 /* Slot Power Limit Value */
 #define  PCI_EXP_DEVCAP_PWR_SCL	0x0c000000 /* Slot Power Limit Scale */
 #define  PCI_EXP_DEVCAP_FLR     0x10000000 /* Function Level Reset */
-#define PCI_EXP_DEVCTL		8	/* Device Control */
+#define PCI_EXP_DEVCTL		0x08	/* Device Control */
 #define  PCI_EXP_DEVCTL_CERE	0x0001	/* Correctable Error Reporting En. */
 #define  PCI_EXP_DEVCTL_NFERE	0x0002	/* Non-Fatal Error Reporting Enable */
 #define  PCI_EXP_DEVCTL_FERE	0x0004	/* Fatal Error Reporting Enable */
@@ -522,7 +522,7 @@
 #define  PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
 #define  PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
 #define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
-#define PCI_EXP_DEVSTA		10	/* Device Status */
+#define PCI_EXP_DEVSTA		0x0a	/* Device Status */
 #define  PCI_EXP_DEVSTA_CED	0x0001	/* Correctable Error Detected */
 #define  PCI_EXP_DEVSTA_NFED	0x0002	/* Non-Fatal Error Detected */
 #define  PCI_EXP_DEVSTA_FED	0x0004	/* Fatal Error Detected */
@@ -530,7 +530,7 @@
 #define  PCI_EXP_DEVSTA_AUXPD	0x0010	/* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND	0x0020	/* Transactions Pending */
 #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1	12	/* v1 endpoints without link end here */
-#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
+#define PCI_EXP_LNKCAP		0x0c	/* Link Capabilities */
 #define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
 #define  PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
@@ -549,7 +549,7 @@
 #define  PCI_EXP_LNKCAP_DLLLARC	0x00100000 /* Data Link Layer Link Active Reporting Capable */
 #define  PCI_EXP_LNKCAP_LBNC	0x00200000 /* Link Bandwidth Notification Capability */
 #define  PCI_EXP_LNKCAP_PN	0xff000000 /* Port Number */
-#define PCI_EXP_LNKCTL		16	/* Link Control */
+#define PCI_EXP_LNKCTL		0x10	/* Link Control */
 #define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
 #define  PCI_EXP_LNKCTL_ASPM_L0S 0x0001	/* L0s Enable */
 #define  PCI_EXP_LNKCTL_ASPM_L1  0x0002	/* L1 Enable */
@@ -562,7 +562,7 @@
 #define  PCI_EXP_LNKCTL_HAWD	0x0200	/* Hardware Autonomous Width Disable */
 #define  PCI_EXP_LNKCTL_LBMIE	0x0400	/* Link Bandwidth Management Interrupt Enable */
 #define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Link Autonomous Bandwidth Interrupt Enable */
-#define PCI_EXP_LNKSTA		18	/* Link Status */
+#define PCI_EXP_LNKSTA		0x12	/* Link Status */
 #define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
 #define  PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */
 #define  PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */
@@ -582,7 +582,7 @@
 #define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
 #define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
 #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1	20	/* v1 endpoints with link end here */
-#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
+#define PCI_EXP_SLTCAP		0x14	/* Slot Capabilities */
 #define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
 #define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
 #define  PCI_EXP_SLTCAP_MRLSP	0x00000004 /* MRL Sensor Present */
@@ -595,7 +595,7 @@
 #define  PCI_EXP_SLTCAP_EIP	0x00020000 /* Electromechanical Interlock Present */
 #define  PCI_EXP_SLTCAP_NCCS	0x00040000 /* No Command Completed Support */
 #define  PCI_EXP_SLTCAP_PSN	0xfff80000 /* Physical Slot Number */
-#define PCI_EXP_SLTCTL		24	/* Slot Control */
+#define PCI_EXP_SLTCTL		0x18	/* Slot Control */
 #define  PCI_EXP_SLTCTL_ABPE	0x0001	/* Attention Button Pressed Enable */
 #define  PCI_EXP_SLTCTL_PFDE	0x0002	/* Power Fault Detected Enable */
 #define  PCI_EXP_SLTCTL_MRLSCE	0x0004	/* MRL Sensor Changed Enable */
@@ -617,7 +617,7 @@
 #define  PCI_EXP_SLTCTL_EIC	0x0800	/* Electromechanical Interlock Control */
 #define  PCI_EXP_SLTCTL_DLLSCE	0x1000	/* Data Link Layer State Changed Enable */
 #define  PCI_EXP_SLTCTL_IBPD_DISABLE	0x4000 /* In-band PD disable */
-#define PCI_EXP_SLTSTA		26	/* Slot Status */
+#define PCI_EXP_SLTSTA		0x1a	/* Slot Status */
 #define  PCI_EXP_SLTSTA_ABP	0x0001	/* Attention Button Pressed */
 #define  PCI_EXP_SLTSTA_PFD	0x0002	/* Power Fault Detected */
 #define  PCI_EXP_SLTSTA_MRLSC	0x0004	/* MRL Sensor Changed */
@@ -627,15 +627,15 @@
 #define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
 #define  PCI_EXP_SLTSTA_EIS	0x0080	/* Electromechanical Interlock Status */
 #define  PCI_EXP_SLTSTA_DLLSC	0x0100	/* Data Link Layer State Changed */
-#define PCI_EXP_RTCTL		28	/* Root Control */
+#define PCI_EXP_RTCTL		0x1c	/* Root Control */
 #define  PCI_EXP_RTCTL_SECEE	0x0001	/* System Error on Correctable Error */
 #define  PCI_EXP_RTCTL_SENFEE	0x0002	/* System Error on Non-Fatal Error */
 #define  PCI_EXP_RTCTL_SEFEE	0x0004	/* System Error on Fatal Error */
 #define  PCI_EXP_RTCTL_PMEIE	0x0008	/* PME Interrupt Enable */
 #define  PCI_EXP_RTCTL_CRSSVE	0x0010	/* CRS Software Visibility Enable */
-#define PCI_EXP_RTCAP		30	/* Root Capabilities */
+#define PCI_EXP_RTCAP		0x1e	/* Root Capabilities */
 #define  PCI_EXP_RTCAP_CRSVIS	0x0001	/* CRS Software Visibility capability */
-#define PCI_EXP_RTSTA		32	/* Root Status */
+#define PCI_EXP_RTSTA		0x20	/* Root Status */
 #define  PCI_EXP_RTSTA_PME	0x00010000 /* PME status */
 #define  PCI_EXP_RTSTA_PENDING	0x00020000 /* PME pending */
 /*
@@ -646,7 +646,7 @@
  * Use pcie_capability_read_word() and similar interfaces to use them
  * safely.
  */
-#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
+#define PCI_EXP_DEVCAP2		0x24	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_COMP_TMOUT_DIS	0x00000010 /* Completion Timeout Disable supported */
 #define  PCI_EXP_DEVCAP2_ARI		0x00000020 /* Alternative Routing-ID */
 #define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE	0x00000040 /* Atomic Op routing */
@@ -658,7 +658,7 @@
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
 #define  PCI_EXP_DEVCAP2_EE_PREFIX	0x00200000 /* End-End TLP Prefix */
-#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
+#define PCI_EXP_DEVCTL2		0x28	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
 #define  PCI_EXP_DEVCTL2_ARI		0x0020	/* Alternative Routing-ID */
@@ -670,9 +670,9 @@
 #define  PCI_EXP_DEVCTL2_OBFF_MSGA_EN	0x2000	/* Enable OBFF Message type A */
 #define  PCI_EXP_DEVCTL2_OBFF_MSGB_EN	0x4000	/* Enable OBFF Message type B */
 #define  PCI_EXP_DEVCTL2_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
-#define PCI_EXP_DEVSTA2		42	/* Device Status 2 */
-#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2	44	/* v2 endpoints without link end here */
-#define PCI_EXP_LNKCAP2		44	/* Link Capabilities 2 */
+#define PCI_EXP_DEVSTA2		0x2a	/* Device Status 2 */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c	/* end of v2 EPs w/o link */
+#define PCI_EXP_LNKCAP2		0x2c	/* Link Capabilities 2 */
 #define  PCI_EXP_LNKCAP2_SLS_2_5GB	0x00000002 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_5_0GB	0x00000004 /* Supported Speed 5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_8_0GB	0x00000008 /* Supported Speed 8GT/s */
@@ -680,7 +680,7 @@
 #define  PCI_EXP_LNKCAP2_SLS_32_0GB	0x00000020 /* Supported Speed 32GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_64_0GB	0x00000040 /* Supported Speed 64GT/s */
 #define  PCI_EXP_LNKCAP2_CROSSLINK	0x00000100 /* Crosslink supported */
-#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
+#define PCI_EXP_LNKCTL2		0x30	/* Link Control 2 */
 #define  PCI_EXP_LNKCTL2_TLS		0x000f
 #define  PCI_EXP_LNKCTL2_TLS_2_5GT	0x0001 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCTL2_TLS_5_0GT	0x0002 /* Supported Speed 5GT/s */
@@ -691,12 +691,12 @@
 #define  PCI_EXP_LNKCTL2_ENTER_COMP	0x0010 /* Enter Compliance */
 #define  PCI_EXP_LNKCTL2_TX_MARGIN	0x0380 /* Transmit Margin */
 #define  PCI_EXP_LNKCTL2_HASD		0x0020 /* HW Autonomous Speed Disable */
-#define PCI_EXP_LNKSTA2		50	/* Link Status 2 */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	52	/* v2 endpoints with link end here */
-#define PCI_EXP_SLTCAP2		52	/* Slot Capabilities 2 */
+#define PCI_EXP_LNKSTA2		0x32	/* Link Status 2 */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	0x32	/* end of v2 EPs w/ link */
+#define PCI_EXP_SLTCAP2		0x34	/* Slot Capabilities 2 */
 #define  PCI_EXP_SLTCAP2_IBPD	0x00000001 /* In-band PD Disable Supported */
-#define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
-#define PCI_EXP_SLTSTA2		58	/* Slot Status 2 */
+#define PCI_EXP_SLTCTL2		0x38	/* Slot Control 2 */
+#define PCI_EXP_SLTSTA2		0x3a	/* Slot Status 2 */
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
@@ -742,7 +742,7 @@
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
 
 /* Advanced Error Reporting */
-#define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
+#define PCI_ERR_UNCOR_STATUS	0x04	/* Uncorrectable Error Status */
 #define  PCI_ERR_UNC_UND	0x00000001	/* Undefined */
 #define  PCI_ERR_UNC_DLP	0x00000010	/* Data Link Protocol */
 #define  PCI_ERR_UNC_SURPDN	0x00000020	/* Surprise Down */
@@ -760,11 +760,11 @@
 #define  PCI_ERR_UNC_MCBTLP	0x00800000	/* MC blocked TLP */
 #define  PCI_ERR_UNC_ATOMEG	0x01000000	/* Atomic egress blocked */
 #define  PCI_ERR_UNC_TLPPRE	0x02000000	/* TLP prefix blocked */
-#define PCI_ERR_UNCOR_MASK	8	/* Uncorrectable Error Mask */
+#define PCI_ERR_UNCOR_MASK	0x08	/* Uncorrectable Error Mask */
 	/* Same bits as above */
-#define PCI_ERR_UNCOR_SEVER	12	/* Uncorrectable Error Severity */
+#define PCI_ERR_UNCOR_SEVER	0x0c	/* Uncorrectable Error Severity */
 	/* Same bits as above */
-#define PCI_ERR_COR_STATUS	16	/* Correctable Error Status */
+#define PCI_ERR_COR_STATUS	0x10	/* Correctable Error Status */
 #define  PCI_ERR_COR_RCVR	0x00000001	/* Receiver Error Status */
 #define  PCI_ERR_COR_BAD_TLP	0x00000040	/* Bad TLP Status */
 #define  PCI_ERR_COR_BAD_DLLP	0x00000080	/* Bad DLLP Status */
@@ -773,20 +773,20 @@
 #define  PCI_ERR_COR_ADV_NFAT	0x00002000	/* Advisory Non-Fatal */
 #define  PCI_ERR_COR_INTERNAL	0x00004000	/* Corrected Internal */
 #define  PCI_ERR_COR_LOG_OVER	0x00008000	/* Header Log Overflow */
-#define PCI_ERR_COR_MASK	20	/* Correctable Error Mask */
+#define PCI_ERR_COR_MASK	0x14	/* Correctable Error Mask */
 	/* Same bits as above */
-#define PCI_ERR_CAP		24	/* Advanced Error Capabilities */
-#define  PCI_ERR_CAP_FEP(x)	((x) & 31)	/* First Error Pointer */
+#define PCI_ERR_CAP		0x18	/* Advanced Error Capabilities & Ctrl*/
+#define  PCI_ERR_CAP_FEP(x)	((x) & 0x1f)	/* First Error Pointer */
 #define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
 #define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
 #define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
 #define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
-#define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
-#define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
+#define PCI_ERR_HEADER_LOG	0x1c	/* Header Log Register (16 bytes) */
+#define PCI_ERR_ROOT_COMMAND	0x2c	/* Root Error Command */
 #define  PCI_ERR_ROOT_CMD_COR_EN	0x00000001 /* Correctable Err Reporting Enable */
 #define  PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002 /* Non-Fatal Err Reporting Enable */
 #define  PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004 /* Fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_STATUS	48
+#define PCI_ERR_ROOT_STATUS	0x30
 #define  PCI_ERR_ROOT_COR_RCV		0x00000001 /* ERR_COR Received */
 #define  PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002 /* Multiple ERR_COR */
 #define  PCI_ERR_ROOT_UNCOR_RCV		0x00000004 /* ERR_FATAL/NONFATAL */
@@ -795,52 +795,52 @@
 #define  PCI_ERR_ROOT_NONFATAL_RCV	0x00000020 /* Non-Fatal Received */
 #define  PCI_ERR_ROOT_FATAL_RCV		0x00000040 /* Fatal Received */
 #define  PCI_ERR_ROOT_AER_IRQ		0xf8000000 /* Advanced Error Interrupt Message Number */
-#define PCI_ERR_ROOT_ERR_SRC	52	/* Error Source Identification */
+#define PCI_ERR_ROOT_ERR_SRC	0x34	/* Error Source Identification */
 
 /* Virtual Channel */
-#define PCI_VC_PORT_CAP1	4
+#define PCI_VC_PORT_CAP1	0x04
 #define  PCI_VC_CAP1_EVCC	0x00000007	/* extended VC count */
 #define  PCI_VC_CAP1_LPEVCC	0x00000070	/* low prio extended VC count */
 #define  PCI_VC_CAP1_ARB_SIZE	0x00000c00
-#define PCI_VC_PORT_CAP2	8
+#define PCI_VC_PORT_CAP2	0x08
 #define  PCI_VC_CAP2_32_PHASE		0x00000002
 #define  PCI_VC_CAP2_64_PHASE		0x00000004
 #define  PCI_VC_CAP2_128_PHASE		0x00000008
 #define  PCI_VC_CAP2_ARB_OFF		0xff000000
-#define PCI_VC_PORT_CTRL	12
+#define PCI_VC_PORT_CTRL	0x0c
 #define  PCI_VC_PORT_CTRL_LOAD_TABLE	0x00000001
-#define PCI_VC_PORT_STATUS	14
+#define PCI_VC_PORT_STATUS	0x0e
 #define  PCI_VC_PORT_STATUS_TABLE	0x00000001
-#define PCI_VC_RES_CAP		16
+#define PCI_VC_RES_CAP		0x10
 #define  PCI_VC_RES_CAP_32_PHASE	0x00000002
 #define  PCI_VC_RES_CAP_64_PHASE	0x00000004
 #define  PCI_VC_RES_CAP_128_PHASE	0x00000008
 #define  PCI_VC_RES_CAP_128_PHASE_TB	0x00000010
 #define  PCI_VC_RES_CAP_256_PHASE	0x00000020
 #define  PCI_VC_RES_CAP_ARB_OFF		0xff000000
-#define PCI_VC_RES_CTRL		20
+#define PCI_VC_RES_CTRL		0x14
 #define  PCI_VC_RES_CTRL_LOAD_TABLE	0x00010000
 #define  PCI_VC_RES_CTRL_ARB_SELECT	0x000e0000
 #define  PCI_VC_RES_CTRL_ID		0x07000000
 #define  PCI_VC_RES_CTRL_ENABLE		0x80000000
-#define PCI_VC_RES_STATUS	26
+#define PCI_VC_RES_STATUS	0x1a
 #define  PCI_VC_RES_STATUS_TABLE	0x00000001
 #define  PCI_VC_RES_STATUS_NEGO		0x00000002
 #define PCI_CAP_VC_BASE_SIZEOF		0x10
-#define PCI_CAP_VC_PER_VC_SIZEOF	0x0C
+#define PCI_CAP_VC_PER_VC_SIZEOF	0x0c
 
 /* Power Budgeting */
-#define PCI_PWR_DSR		4	/* Data Select Register */
-#define PCI_PWR_DATA		8	/* Data Register */
+#define PCI_PWR_DSR		0x04	/* Data Select Register */
+#define PCI_PWR_DATA		0x08	/* Data Register */
 #define  PCI_PWR_DATA_BASE(x)	((x) & 0xff)	    /* Base Power */
 #define  PCI_PWR_DATA_SCALE(x)	(((x) >> 8) & 3)    /* Data Scale */
 #define  PCI_PWR_DATA_PM_SUB(x)	(((x) >> 10) & 7)   /* PM Sub State */
 #define  PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
 #define  PCI_PWR_DATA_TYPE(x)	(((x) >> 15) & 7)   /* Type */
 #define  PCI_PWR_DATA_RAIL(x)	(((x) >> 18) & 7)   /* Power Rail */
-#define PCI_PWR_CAP		12	/* Capability */
+#define PCI_PWR_CAP		0x0c	/* Capability */
 #define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
-#define PCI_EXT_CAP_PWR_SIZEOF	16
+#define PCI_EXT_CAP_PWR_SIZEOF	0x10
 
 /* Root Complex Event Collector Endpoint Association  */
 #define PCI_RCEC_RCIEP_BITMAP	4	/* Associated Bitmap for RCiEPs */
@@ -964,7 +964,7 @@
 #define  PCI_SRIOV_VFM_MI	0x1	/* Dormant.MigrateIn */
 #define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
 #define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
-#define PCI_EXT_CAP_SRIOV_SIZEOF 64
+#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40
 
 #define PCI_LTR_MAX_SNOOP_LAT	0x4
 #define PCI_LTR_MAX_NOSNOOP_LAT	0x6
@@ -1017,12 +1017,12 @@
 #define   PCI_TPH_LOC_NONE	0x000	/* no location */
 #define   PCI_TPH_LOC_CAP	0x200	/* in capability */
 #define   PCI_TPH_LOC_MSIX	0x400	/* in MSI-X */
-#define PCI_TPH_CAP_ST_MASK	0x07FF0000	/* st table mask */
-#define PCI_TPH_CAP_ST_SHIFT	16	/* st table shift */
-#define PCI_TPH_BASE_SIZEOF	12	/* size with no st table */
+#define PCI_TPH_CAP_ST_MASK	0x07FF0000	/* ST table mask */
+#define PCI_TPH_CAP_ST_SHIFT	16	/* ST table shift */
+#define PCI_TPH_BASE_SIZEOF	0xc	/* size with no ST table */
 
 /* Downstream Port Containment */
-#define PCI_EXP_DPC_CAP			4	/* DPC Capability */
+#define PCI_EXP_DPC_CAP			0x04	/* DPC Capability */
 #define PCI_EXP_DPC_IRQ			0x001F	/* Interrupt Message Number */
 #define  PCI_EXP_DPC_CAP_RP_EXT		0x0020	/* Root Port Extensions */
 #define  PCI_EXP_DPC_CAP_POISONED_TLP	0x0040	/* Poisoned TLP Egress Blocking Supported */
@@ -1030,19 +1030,19 @@
 #define  PCI_EXP_DPC_RP_PIO_LOG_SIZE	0x0F00	/* RP PIO Log Size */
 #define  PCI_EXP_DPC_CAP_DL_ACTIVE	0x1000	/* ERR_COR signal on DL_Active supported */
 
-#define PCI_EXP_DPC_CTL			6	/* DPC control */
+#define PCI_EXP_DPC_CTL			0x06	/* DPC control */
 #define  PCI_EXP_DPC_CTL_EN_FATAL	0x0001	/* Enable trigger on ERR_FATAL message */
 #define  PCI_EXP_DPC_CTL_EN_NONFATAL	0x0002	/* Enable trigger on ERR_NONFATAL message */
 #define  PCI_EXP_DPC_CTL_INT_EN		0x0008	/* DPC Interrupt Enable */
 
-#define PCI_EXP_DPC_STATUS		8	/* DPC Status */
+#define PCI_EXP_DPC_STATUS		0x08	/* DPC Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER	    0x0001 /* Trigger Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN	    0x0006 /* Trigger Reason */
 #define  PCI_EXP_DPC_STATUS_INTERRUPT	    0x0008 /* Interrupt Status */
 #define  PCI_EXP_DPC_RP_BUSY		    0x0010 /* Root Port Busy */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */
 
-#define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
+#define PCI_EXP_DPC_SOURCE_ID		 0x0A	/* DPC Source Identifier */
 
 #define PCI_EXP_DPC_RP_PIO_STATUS	 0x0C	/* RP PIO Status */
 #define PCI_EXP_DPC_RP_PIO_MASK		 0x10	/* RP PIO Mask */
-- 
cgit v1.2.3


From ff5f87cb6a75dbf6d30668d2464e46249dd5c47f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 3 Dec 2021 11:28:49 +0100
Subject: clk: Introduce clk-tps68470 driver

The TPS68470 PMIC provides Clocks, GPIOs and Regulators. At present in
the kernel the Regulators and Clocks are controlled by an OpRegion
driver designed to work with power control methods defined in ACPI, but
some platforms lack those methods, meaning drivers need to be able to
consume the resources of these chips through the usual frameworks.

This commit adds a driver for the clocks provided by the tps68470,
and is designed to bind to the platform_device registered by the
intel_skl_int3472 module.

This is based on this out of tree driver written by Intel:
https://github.com/intel/linux-intel-lts/blob/4.14/base/drivers/clk/clk-tps68470.c
with various cleanups added.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20211203102857.44539-7-hdegoede@redhat.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/mfd/tps68470.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h
index ffe81127d91c..7807fa329db0 100644
--- a/include/linux/mfd/tps68470.h
+++ b/include/linux/mfd/tps68470.h
@@ -75,6 +75,17 @@
 #define TPS68470_CLKCFG1_MODE_A_MASK	GENMASK(1, 0)
 #define TPS68470_CLKCFG1_MODE_B_MASK	GENMASK(3, 2)
 
+#define TPS68470_CLKCFG2_DRV_STR_2MA	0x05
+#define TPS68470_PLL_OUTPUT_ENABLE	0x02
+#define TPS68470_CLK_SRC_XTAL		BIT(0)
+#define TPS68470_PLLSWR_DEFAULT		GENMASK(1, 0)
+#define TPS68470_OSC_EXT_CAP_DEFAULT	0x05
+
+#define TPS68470_OUTPUT_A_SHIFT		0x00
+#define TPS68470_OUTPUT_B_SHIFT		0x02
+#define TPS68470_CLK_SRC_SHIFT		GENMASK(2, 0)
+#define TPS68470_OSC_EXT_CAP_SHIFT	BIT(2)
+
 #define TPS68470_GPIO_CTL_REG_A(x)	(TPS68470_REG_GPCTL0A + (x) * 2)
 #define TPS68470_GPIO_CTL_REG_B(x)	(TPS68470_REG_GPCTL0B + (x) * 2)
 #define TPS68470_GPIO_MODE_MASK		GENMASK(1, 0)
-- 
cgit v1.2.3


From fe415186b43df0db1f17fa3a46275fd92107fe71 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 16 Dec 2021 08:24:08 +0100
Subject: xen/console: harden hvc_xen against event channel storms

The Xen console driver is still vulnerable for an attack via excessive
number of events sent by the backend. Fix that by using a lateeoi event
channel.

For the normal domU initial console this requires the introduction of
bind_evtchn_to_irq_lateeoi() as there is no xenbus device available
at the time the event channel is bound to the irq.

As the decision whether an interrupt was spurious or not requires to
test for bytes having been read from the backend, move sending the
event into the if statement, as sending an event without having found
any bytes to be read is making no sense at all.

This is part of XSA-391

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
V2:
- slightly adapt spurious irq detection (Jan Beulich)
V3:
- fix spurious irq detection (Jan Beulich)
---
 include/xen/events.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/xen/events.h b/include/xen/events.h
index c204262d9fc2..344081e71584 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -17,6 +17,7 @@ struct xenbus_device;
 unsigned xen_evtchn_nr_channels(void);
 
 int bind_evtchn_to_irq(evtchn_port_t evtchn);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
 			      irq_handler_t handler,
 			      unsigned long irqflags, const char *devname,
-- 
cgit v1.2.3


From dfd0743f1d9ea76931510ed150334d571fbab49d Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Thu, 9 Dec 2021 15:59:37 +0100
Subject: tee: handle lookup of shm with reference count 0

Since the tee subsystem does not keep a strong reference to its idle
shared memory buffers, it races with other threads that try to destroy a
shared memory through a close of its dma-buf fd or by unmapping the
memory.

In tee_shm_get_from_id() when a lookup in teedev->idr has been
successful, it is possible that the tee_shm is in the dma-buf teardown
path, but that path is blocked by the teedev mutex. Since we don't have
an API to tell if the tee_shm is in the dma-buf teardown path or not we
must find another way of detecting this condition.

Fix this by doing the reference counting directly on the tee_shm using a
new refcount_t refcount field. dma-buf is replaced by using
anon_inode_getfd() instead, this separates the life-cycle of the
underlying file from the tee_shm. tee_shm_put() is updated to hold the
mutex when decreasing the refcount to 0 and then remove the tee_shm from
teedev->idr before releasing the mutex. This means that the tee_shm can
never be found unless it has a refcount larger than 0.

Fixes: 967c9cca2cc5 ("tee: generic TEE subsystem")
Cc: stable@vger.kernel.org
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Lars Persson <larper@axis.com>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Reported-by: Patrik Lantz <patrik.lantz@axis.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_drv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index a1f03461369b..cf5999626e28 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method,
  * @offset:	offset of buffer in user space
  * @pages:	locked pages from userspace
  * @num_pages:	number of locked pages
- * @dmabuf:	dmabuf used to for exporting to user space
+ * @refcount:	reference counter
  * @flags:	defined by TEE_SHM_* in tee_drv.h
  * @id:		unique id of a shared memory object on this device, shared
  *		with user space
@@ -214,7 +214,7 @@ struct tee_shm {
 	unsigned int offset;
 	struct page **pages;
 	size_t num_pages;
-	struct dma_buf *dmabuf;
+	refcount_t refcount;
 	u32 flags;
 	int id;
 	u64 sec_world_id;
-- 
cgit v1.2.3


From 877691b987a089938d67de13d886932ef2f21b22 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Sun, 12 Dec 2021 15:24:06 +0900
Subject: drm/format-helper: Add drm_fb_xrgb8888_to_xrgb2101010_toio()

Add XRGB8888 emulation support for devices that can only do XRGB2101010.

This is chiefly useful for simpledrm on Apple devices where the
bootloader-provided framebuffer is 10-bit.

Signed-off-by: Hector Martin <marcan@marcan.st>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20211212062407.138309-3-marcan@marcan.st
---
 include/drm/drm_format_helper.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 97e4c3223af3..b30ed5de0a33 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -33,6 +33,9 @@ void drm_fb_xrgb8888_to_rgb888(void *dst, unsigned int dst_pitch, const void *sr
 void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch,
 				    const void *vaddr, const struct drm_framebuffer *fb,
 				    const struct drm_rect *clip);
+void drm_fb_xrgb8888_to_xrgb2101010_toio(void __iomem *dst, unsigned int dst_pitch,
+					 const void *vaddr, const struct drm_framebuffer *fb,
+					 const struct drm_rect *clip);
 void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr,
 			      const struct drm_framebuffer *fb, const struct drm_rect *clip);
 
-- 
cgit v1.2.3


From 006ea1b5822f9019bd722ffc6242bc0880879e3d Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Wed, 15 Dec 2021 10:17:37 +0100
Subject: drm/fourcc: Add packed 10bit YUV 4:2:0 format

Adds a format that is 3 10bit YUV 4:2:0 samples packed into
a 32bit word (with 2 spare bits).

Supported on Broadcom BCM2711 chips.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://lore.kernel.org/r/20211215091739.135042-2-maxime@cerno.tech
---
 include/uapi/drm/drm_fourcc.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 7f652c96845b..fc0c1454d275 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -314,6 +314,13 @@ extern "C" {
  */
 #define DRM_FORMAT_P016		fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */
 
+/* 2 plane YCbCr420.
+ * 3 10 bit components and 2 padding bits packed into 4 bytes.
+ * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian
+ * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian
+ */
+#define DRM_FORMAT_P030		fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
+
 /* 3 plane non-subsampled (444) YCbCr
  * 16 bits per component, but only 10 bits are used and 6 bits are padded
  * index 0: Y plane, [15:0] Y:x [10:6] little endian
@@ -854,6 +861,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  * and UV.  Some SAND-using hardware stores UV in a separate tiled
  * image from Y to reduce the column height, which is not supported
  * with these modifiers.
+ *
+ * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also
+ * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes
+ * wide, but as this is a 10 bpp format that translates to 96 pixels.
  */
 
 #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \
-- 
cgit v1.2.3


From cb1c4aba055f928ffae0c868e8dfe08eeab302e7 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Mon, 6 Dec 2021 14:47:46 +0530
Subject: perf: Add new macros for mem_hops field

Add new macros for mem_hops field which can be used to
represent remote-node, socket and board level details.

Currently the code had macro for HOPS_0, which corresponds
to data coming from another core but same node.
Add new macros for HOPS_1 to HOPS_3 to represent
remote-node, socket and board level data.

For ex: Encodings for mem_hops fields with L2 cache:

L2			- local L2
L2 | REMOTE | HOPS_0	- remote core, same node L2
L2 | REMOTE | HOPS_1	- remote node, same socket L2
L2 | REMOTE | HOPS_2	- remote socket, same board L2
L2 | REMOTE | HOPS_3	- remote board L2

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211206091749.87585-2-kjain@linux.ibm.com
---
 include/uapi/linux/perf_event.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bd8860eeb291..1b65042ab1db 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1332,7 +1332,10 @@ union perf_mem_data_src {
 
 /* hop level */
 #define PERF_MEM_HOPS_0		0x01 /* remote core, same node */
-/* 2-7 available */
+#define PERF_MEM_HOPS_1		0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2		0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3		0x04 /* remote board */
+/* 5-7 available */
 #define PERF_MEM_HOPS_SHIFT	43
 
 #define PERF_MEM_S(a, s) \
-- 
cgit v1.2.3


From d1e86325af377129adb7fc6f34eb044ca6068b47 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 15 Dec 2021 15:34:15 +0000
Subject: net: phylink: add mac_select_pcs() method to phylink_mac_ops

mac_select_pcs() allows us to have an explicit point to query which
PCS the MAC wishes to use for a particular PHY interface mode, thereby
allowing us to add support to validate the link settings with the PCS.

Phylink will also use this to select the PCS to be used during a major
configuration event without the MAC driver needing to call
phylink_set_pcs().

Note that if mac_select_pcs() is present, the supported_interfaces
bitmap must be filled in; this avoids mac_select_pcs() being called
with PHY_INTERFACE_MODE_NA when we want to get support for all
interface types. Phylink will return an error in phylink_create()
unless this condition is satisfied.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index a2f266cc3442..b3086dcafeaf 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -112,6 +112,7 @@ struct phylink_config {
 /**
  * struct phylink_mac_ops - MAC operations structure.
  * @validate: Validate and update the link configuration.
+ * @mac_select_pcs: Select a PCS for the interface mode.
  * @mac_pcs_get_state: Read the current link state from the hardware.
  * @mac_prepare: prepare for a major reconfiguration of the interface.
  * @mac_config: configure the MAC for the selected mode and state.
@@ -126,6 +127,8 @@ struct phylink_mac_ops {
 	void (*validate)(struct phylink_config *config,
 			 unsigned long *supported,
 			 struct phylink_link_state *state);
+	struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config,
+					      phy_interface_t interface);
 	void (*mac_pcs_get_state)(struct phylink_config *config,
 				  struct phylink_link_state *state);
 	int (*mac_prepare)(struct phylink_config *config, unsigned int mode,
@@ -178,6 +181,21 @@ struct phylink_mac_ops {
  */
 void validate(struct phylink_config *config, unsigned long *supported,
 	      struct phylink_link_state *state);
+/**
+ * mac_select_pcs: Select a PCS for the interface mode.
+ * @config: a pointer to a &struct phylink_config.
+ * @interface: PHY interface mode for PCS
+ *
+ * Return the &struct phylink_pcs for the specified interface mode, or
+ * NULL if none is required, or an error pointer on error.
+ *
+ * This must not modify any state. It is used to query which PCS should
+ * be used. Phylink will use this during validation to ensure that the
+ * configuration is valid, and when setting a configuration to internally
+ * set the PCS that will be used.
+ */
+struct phylink_pcs *mac_select_pcs(struct phylink_config *config,
+				   phy_interface_t interface);
 
 /**
  * mac_pcs_get_state() - Read the current inband link state from the hardware
-- 
cgit v1.2.3


From 0d22d4b626a4eaa3196019092eb6c1919e9f8caa Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 15 Dec 2021 15:34:20 +0000
Subject: net: phylink: add pcs_validate() method

Add a hook for PCS to validate the link parameters. This avoids MAC
drivers having to have knowledge of their PCS in their validate()
method, thereby allowing several MAC drivers to be simplfied.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index b3086dcafeaf..713a0c928b7c 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -416,6 +416,7 @@ struct phylink_pcs {
 
 /**
  * struct phylink_pcs_ops - MAC PCS operations structure.
+ * @pcs_validate: validate the link configuration.
  * @pcs_get_state: read the current MAC PCS link state from the hardware.
  * @pcs_config: configure the MAC PCS for the selected mode and state.
  * @pcs_an_restart: restart 802.3z BaseX autonegotiation.
@@ -423,6 +424,8 @@ struct phylink_pcs {
  *               (where necessary).
  */
 struct phylink_pcs_ops {
+	int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported,
+			    const struct phylink_link_state *state);
 	void (*pcs_get_state)(struct phylink_pcs *pcs,
 			      struct phylink_link_state *state);
 	int (*pcs_config)(struct phylink_pcs *pcs, unsigned int mode,
@@ -435,6 +438,23 @@ struct phylink_pcs_ops {
 };
 
 #if 0 /* For kernel-doc purposes only. */
+/**
+ * pcs_validate() - validate the link configuration.
+ * @pcs: a pointer to a &struct phylink_pcs.
+ * @supported: ethtool bitmask for supported link modes.
+ * @state: a const pointer to a &struct phylink_link_state.
+ *
+ * Validate the interface mode, and advertising's autoneg bit, removing any
+ * media ethtool link modes that would not be supportable from the supported
+ * mask. Phylink will propagate the changes to the advertising mask. See the
+ * &struct phylink_mac_ops validate() method.
+ *
+ * Returns -EINVAL if the interface mode/autoneg mode is not supported.
+ * Returns non-zero positive if the link state can be supported.
+ */
+int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
+		 const struct phylink_link_state *state);
+
 /**
  * pcs_get_state() - Read the current inband link state from the hardware
  * @pcs: a pointer to a &struct phylink_pcs.
-- 
cgit v1.2.3


From 9131c6331726514f0d0364c41f8733cc5eec11ab Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 1 Dec 2021 02:23:09 +0300
Subject: soc/tegra: Add devm_tegra_core_dev_init_opp_table_common()

Only couple drivers need to get the -ENODEV error code and majority of
drivers need to explicitly initialize the performance state. Add new
common helper which sets up OPP table for these drivers.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/soc/tegra/common.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h
index af41ad80ec21..8ec1ac07fc85 100644
--- a/include/soc/tegra/common.h
+++ b/include/soc/tegra/common.h
@@ -39,4 +39,19 @@ devm_tegra_core_dev_init_opp_table(struct device *dev,
 }
 #endif
 
+static inline int
+devm_tegra_core_dev_init_opp_table_common(struct device *dev)
+{
+	struct tegra_core_opp_params opp_params = {};
+	int err;
+
+	opp_params.init_state = true;
+
+	err = devm_tegra_core_dev_init_opp_table(dev, &opp_params);
+	if (err != -ENODEV)
+		return err;
+
+	return 0;
+}
+
 #endif /* __SOC_TEGRA_COMMON_H__ */
-- 
cgit v1.2.3


From c6aeaf56f468a565f6d2f27325fc07d35cdcd3cb Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 9 Sep 2021 15:51:24 +0200
Subject: drm/tegra: Implement correct DMA-BUF semantics

DMA-BUF requires that each device that accesses a DMA-BUF attaches to it
separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions
need to be reimplemented so that they can return a mapping, which either
represents an attachment or a map of the driver's own GEM object.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/host1x.h | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 7bccf589aba7..157326074df8 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -7,6 +7,7 @@
 #define __LINUX_HOST1X_H
 
 #include <linux/device.h>
+#include <linux/dma-direction.h>
 #include <linux/types.h>
 
 enum host1x_class {
@@ -82,12 +83,23 @@ struct host1x_client {
 struct host1x_bo;
 struct sg_table;
 
+struct host1x_bo_mapping {
+	struct dma_buf_attachment *attach;
+	enum dma_data_direction direction;
+	struct host1x_bo *bo;
+	struct sg_table *sgt;
+	unsigned int chunks;
+	struct device *dev;
+	dma_addr_t phys;
+	size_t size;
+};
+
 struct host1x_bo_ops {
 	struct host1x_bo *(*get)(struct host1x_bo *bo);
 	void (*put)(struct host1x_bo *bo);
-	struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
-				dma_addr_t *phys);
-	void (*unpin)(struct device *dev, struct sg_table *sgt);
+	struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo,
+					 enum dma_data_direction dir);
+	void (*unpin)(struct host1x_bo_mapping *map);
 	void *(*mmap)(struct host1x_bo *bo);
 	void (*munmap)(struct host1x_bo *bo, void *addr);
 };
@@ -112,17 +124,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo)
 	bo->ops->put(bo);
 }
 
-static inline struct sg_table *host1x_bo_pin(struct device *dev,
-					     struct host1x_bo *bo,
-					     dma_addr_t *phys)
+static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+						      enum dma_data_direction dir)
 {
-	return bo->ops->pin(dev, bo, phys);
+	return bo->ops->pin(dev, bo, dir);
 }
 
-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
-				   struct sg_table *sgt)
+static inline void host1x_bo_unpin(struct host1x_bo_mapping *map)
 {
-	bo->ops->unpin(dev, sgt);
+	map->bo->ops->unpin(map);
 }
 
 static inline void *host1x_bo_mmap(struct host1x_bo *bo)
-- 
cgit v1.2.3


From 1f39b1dfa53c84b56d7ad37fed44afda7004959d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 7 Feb 2020 16:50:52 +0100
Subject: drm/tegra: Implement buffer object cache

This cache is used to avoid mapping and unmapping buffer objects
unnecessarily. Mappings are cached per client and stay hot until
the buffer object is destroyed.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/host1x.h | 53 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 157326074df8..d0bb16cdd005 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -8,6 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/dma-direction.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 enum host1x_class {
@@ -24,6 +25,28 @@ struct iommu_group;
 
 u64 host1x_get_dma_mask(struct host1x *host1x);
 
+/**
+ * struct host1x_bo_cache - host1x buffer object cache
+ * @mappings: list of mappings
+ * @lock: synchronizes accesses to the list of mappings
+ */
+struct host1x_bo_cache {
+	struct list_head mappings;
+	struct mutex lock;
+};
+
+static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache)
+{
+	INIT_LIST_HEAD(&cache->mappings);
+	mutex_init(&cache->lock);
+}
+
+static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache)
+{
+	/* XXX warn if not empty? */
+	mutex_destroy(&cache->lock);
+}
+
 /**
  * struct host1x_client_ops - host1x client operations
  * @early_init: host1x client early initialization code
@@ -74,6 +97,8 @@ struct host1x_client {
 	struct host1x_client *parent;
 	unsigned int usecount;
 	struct mutex lock;
+
+	struct host1x_bo_cache cache;
 };
 
 /*
@@ -84,16 +109,26 @@ struct host1x_bo;
 struct sg_table;
 
 struct host1x_bo_mapping {
+	struct kref ref;
 	struct dma_buf_attachment *attach;
 	enum dma_data_direction direction;
+	struct list_head list;
 	struct host1x_bo *bo;
 	struct sg_table *sgt;
 	unsigned int chunks;
 	struct device *dev;
 	dma_addr_t phys;
 	size_t size;
+
+	struct host1x_bo_cache *cache;
+	struct list_head entry;
 };
 
+static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref)
+{
+	return container_of(ref, struct host1x_bo_mapping, ref);
+}
+
 struct host1x_bo_ops {
 	struct host1x_bo *(*get)(struct host1x_bo *bo);
 	void (*put)(struct host1x_bo *bo);
@@ -106,11 +141,15 @@ struct host1x_bo_ops {
 
 struct host1x_bo {
 	const struct host1x_bo_ops *ops;
+	struct list_head mappings;
+	spinlock_t lock;
 };
 
 static inline void host1x_bo_init(struct host1x_bo *bo,
 				  const struct host1x_bo_ops *ops)
 {
+	INIT_LIST_HEAD(&bo->mappings);
+	spin_lock_init(&bo->lock);
 	bo->ops = ops;
 }
 
@@ -124,16 +163,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo)
 	bo->ops->put(bo);
 }
 
-static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
-						      enum dma_data_direction dir)
-{
-	return bo->ops->pin(dev, bo, dir);
-}
-
-static inline void host1x_bo_unpin(struct host1x_bo_mapping *map)
-{
-	map->bo->ops->unpin(map);
-}
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+					enum dma_data_direction dir,
+					struct host1x_bo_cache *cache);
+void host1x_bo_unpin(struct host1x_bo_mapping *map);
 
 static inline void *host1x_bo_mmap(struct host1x_bo *bo)
 {
-- 
cgit v1.2.3


From 46f226c93d35b936aeec6eb31da932dc2e86f413 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Thu, 16 Sep 2021 17:55:17 +0300
Subject: drm/tegra: Add NVDEC driver

Add support for booting and using NVDEC on Tegra210, Tegra186
and Tegra194 to the Host1x and TegraDRM drivers. Booting in
secure mode is not currently supported.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/host1x.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index d0bb16cdd005..2ca53d7ed7ca 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -17,6 +17,8 @@ enum host1x_class {
 	HOST1X_CLASS_GR2D_SB = 0x52,
 	HOST1X_CLASS_VIC = 0x5D,
 	HOST1X_CLASS_GR3D = 0x60,
+	HOST1X_CLASS_NVDEC = 0xF0,
+	HOST1X_CLASS_NVDEC1 = 0xF5,
 };
 
 struct host1x;
-- 
cgit v1.2.3


From 9ca790f44606109071ab1a3a37ed99e91794c37c Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 1 Dec 2021 02:23:16 +0300
Subject: gpu: host1x: Add host1x_channel_stop()

Add host1x_channel_stop() which waits till channel becomes idle and then
stops the channel hardware. This is needed for supporting suspend/resume
by host1x drivers since the hardware state is lost after power-gating,
thus the channel needs to be stopped before client enters into suspend.

Tested-by: Peter Geis <pgwipeout@gmail.com> # Ouya T30
Tested-by: Paul Fertser <fercerpav@gmail.com> # PAZ00 T20
Tested-by: Nicolas Chauvet <kwizart@gmail.com> # PAZ00 T20 and TK1 T124
Tested-by: Matt Merhar <mattmerhar@protonmail.com> # Ouya T30
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/host1x.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 2ca53d7ed7ca..e8dc5bc41f79 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -226,6 +226,7 @@ struct host1x_job;
 
 struct host1x_channel *host1x_channel_request(struct host1x_client *client);
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
+void host1x_channel_stop(struct host1x_channel *channel);
 void host1x_channel_put(struct host1x_channel *channel);
 int host1x_job_submit(struct host1x_job *job);
 
-- 
cgit v1.2.3


From c0cdc89072a3e1ae3981437f385de14b7bba8fd8 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Wed, 27 Oct 2021 16:15:04 +0100
Subject: irqchip/gic-v3-its: Give the percpu rdist struct its own flags field

Later patches will require tracking some per-rdist status. Reuse the bytes
"lost" to padding within the __percpu rdist struct as a flags field, and
re-encode ->lpi_enabled within said flags.

No change in functionality intended.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211027151506.2085066-2-valentin.schneider@arm.com
---
 include/linux/irqchip/arm-gic-v3.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 81cbf85f73de..0dc34d7d735a 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -615,7 +615,7 @@ struct rdists {
 		void __iomem	*rd_base;
 		struct page	*pend_page;
 		phys_addr_t	phys_base;
-		bool		lpi_enabled;
+		u64             flags;
 		cpumask_t	*vpe_table_mask;
 		void		*vpe_l1_base;
 	} __percpu		*rdist;
-- 
cgit v1.2.3


From d23bc2bc1d634658d7fa96395419c1c553a784f0 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Wed, 27 Oct 2021 16:15:05 +0100
Subject: irqchip/gic-v3-its: Postpone LPI pending table freeing and memreserve

Memory used by the LPI tables have to be made persistent for kexec to have
a chance to work, as explained in [1]. If they have been made persistent
and we are booting into a kexec'd kernel, we also need to free the pages
that were preemptively allocated by the new kernel for those tables.

Both of those operations currently happen during its_cpu_init(), which
happens in a _STARTING (IOW atomic) cpuhp callback for secondary
CPUs. efi_mem_reserve_iomem() issues a GFP_ATOMIC allocation, which
unfortunately doesn't work under PREEMPT_RT (this ends up grabbing a
non-raw spinlock, which can sleep under PREEMPT_RT). Similarly, freeing the
pages ends up grabbing a sleepable spinlock.

Since the memreserve is only required by kexec, it doesn't have to be done
so early in the secondary boot process. Issue the reservation in a new
CPUHP_AP_ONLINE_DYN cpuhp callback, and piggy-back the page freeing on top
of it. A CPU gets to run the body of this new callback exactly once.

As kexec issues a machine_shutdown() prior to machine_kexec(), it will be
serialized vs a CPU being plugged to life by the hotplug machinery - either
the CPU will have been brought up and have had its redistributor's pending
table memreserved, or it never went online and will have its table
allocated by the new kernel.

[1]: https://lore.kernel.org/lkml/20180921195954.21574-1-marc.zyngier@arm.com/

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211027151506.2085066-3-valentin.schneider@arm.com
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 0dc34d7d735a..51b85506ae90 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -632,6 +632,7 @@ struct rdists {
 
 struct irq_domain;
 struct fwnode_handle;
+int __init its_lpi_memreserve_init(void);
 int its_cpu_init(void);
 int its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	     struct irq_domain *domain);
-- 
cgit v1.2.3


From 835f442fdbce33a47a6bde356643fd7e3ef7ec1b Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Wed, 27 Oct 2021 16:15:06 +0100
Subject: irqchip/gic-v3-its: Limit memreserve cpuhp state lifetime

The new memreserve cpuhp callback only needs to survive up until a point
where every CPU in the system has booted once. Beyond that, it becomes a
no-op and can be put in the bin.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211027151506.2085066-4-valentin.schneider@arm.com
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 51b85506ae90..12d91f0dedf9 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -624,6 +624,7 @@ struct rdists {
 	u64			flags;
 	u32			gicd_typer;
 	u32			gicd_typer2;
+	int                     cpuhp_memreserve_state;
 	bool			has_vlpis;
 	bool			has_rvpeid;
 	bool			has_direct_lpi;
-- 
cgit v1.2.3


From 92e1bcee067f1722703bf61047ef674be1b6d1c0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 16 Dec 2021 13:05:06 +0100
Subject: fib: rules: remove duplicated nla policies

The attributes are identical in all implementations so move the ipv4 one
into the core and remove the per-family nla policies.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/fib_rules.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index bd07484ab9dd..4183204915dc 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -91,7 +91,6 @@ struct fib_rules_ops {
 	void			(*flush_cache)(struct fib_rules_ops *ops);
 
 	int			nlgroup;
-	const struct nla_policy	*policy;
 	struct list_head	rules_list;
 	struct module		*owner;
 	struct net		*fro_net;
-- 
cgit v1.2.3


From 66495f301c69288a32b9d4bdb60c20ef42d90e6e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 16 Dec 2021 13:05:07 +0100
Subject: fib: expand fib_rule_policy

Now that there is only one fib nla_policy there is no need to
keep the macro around.  Place it where its used.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/fib_rules.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 4183204915dc..82da359bca03 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -102,26 +102,6 @@ struct fib_rule_notifier_info {
 	struct fib_rule *rule;
 };
 
-#define FRA_GENERIC_POLICY \
-	[FRA_UNSPEC]	= { .strict_start_type = FRA_DPORT_RANGE + 1 }, \
-	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
-	[FRA_OIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
-	[FRA_PRIORITY]	= { .type = NLA_U32 }, \
-	[FRA_FWMARK]	= { .type = NLA_U32 }, \
-	[FRA_TUN_ID]	= { .type = NLA_U64 }, \
-	[FRA_FWMASK]	= { .type = NLA_U32 }, \
-	[FRA_TABLE]     = { .type = NLA_U32 }, \
-	[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
-	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
-	[FRA_GOTO]	= { .type = NLA_U32 }, \
-	[FRA_L3MDEV]	= { .type = NLA_U8 }, \
-	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }, \
-	[FRA_PROTOCOL]  = { .type = NLA_U8 }, \
-	[FRA_IP_PROTO]  = { .type = NLA_U8 }, \
-	[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
-	[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
-
-
 static inline void fib_rule_get(struct fib_rule *rule)
 {
 	refcount_inc(&rule->refcnt);
-- 
cgit v1.2.3


From fc5e0e37621973758e7e49dd85f28673e72e4b85 Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Fri, 12 Nov 2021 13:35:34 +0100
Subject: dt-bindings: Update headers for Tegra234

Add a few more clocks that will be used in follow-up patches to enable
more functionality on Tegra234.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/dt-bindings/clock/tegra234-clock.h | 17 ++++++++++++++---
 include/dt-bindings/reset/tegra234-reset.h | 12 ++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h
index 2c82072950ee..21ed0c732fb9 100644
--- a/include/dt-bindings/clock/tegra234-clock.h
+++ b/include/dt-bindings/clock/tegra234-clock.h
@@ -4,11 +4,22 @@
 #ifndef DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H
 #define DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H
 
+/**
+ * @file
+ * @defgroup bpmp_clock_ids Clock ID's
+ * @{
+ */
 /** @brief output of gate CLK_ENB_FUSE */
-#define TEGRA234_CLK_FUSE			40
+#define TEGRA234_CLK_FUSE			40U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */
-#define TEGRA234_CLK_SDMMC4			123
+#define TEGRA234_CLK_SDMMC4			123U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */
-#define TEGRA234_CLK_UARTA			155
+#define TEGRA234_CLK_UARTA			155U
+/** @brief CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC_LEGACY_TM switch divider output */
+#define TEGRA234_CLK_SDMMC_LEGACY_TM		219U
+/** @brief PLL controlled by CLK_RST_CONTROLLER_PLLC4_BASE */
+#define TEGRA234_CLK_PLLC4			237U
+/** @brief 32K input clock provided by PMIC */
+#define TEGRA234_CLK_CLK_32K			289U
 
 #endif
diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h
index b3c63be06d2d..50e13bced642 100644
--- a/include/dt-bindings/reset/tegra234-reset.h
+++ b/include/dt-bindings/reset/tegra234-reset.h
@@ -4,7 +4,15 @@
 #ifndef DT_BINDINGS_RESET_TEGRA234_RESET_H
 #define DT_BINDINGS_RESET_TEGRA234_RESET_H
 
-#define TEGRA234_RESET_SDMMC4			85
-#define TEGRA234_RESET_UARTA			100
+/**
+ * @file
+ * @defgroup bpmp_reset_ids Reset ID's
+ * @brief Identifiers for Resets controllable by firmware
+ * @{
+ */
+#define TEGRA234_RESET_SDMMC4			85U
+#define TEGRA234_RESET_UARTA			100U
+
+/** @} */
 
 #endif
-- 
cgit v1.2.3


From c3859c1436e31461f27365ec6fc751c9bbeff3e2 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 13 Dec 2021 17:21:47 +0100
Subject: dt-bindings: memory: tegra: Add Tegra234 support

Document the variant of the memory controller and external memory
controllers found on Tegra234 and add some memory client and SMMU
stream ID definitions for use in device tree files.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/dt-bindings/clock/tegra234-clock.h |  9 +++++++++
 include/dt-bindings/memory/tegra234-mc.h   | 32 ++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 include/dt-bindings/memory/tegra234-mc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h
index 21ed0c732fb9..8d7e66e1b6ef 100644
--- a/include/dt-bindings/clock/tegra234-clock.h
+++ b/include/dt-bindings/clock/tegra234-clock.h
@@ -9,6 +9,15 @@
  * @defgroup bpmp_clock_ids Clock ID's
  * @{
  */
+/**
+ * @brief controls the EMC clock frequency.
+ * @details Doing a clk_set_rate on this clock will select the
+ * appropriate clock source, program the source rate and execute a
+ * specific sequence to switch to the new clock source for both memory
+ * controllers. This can be used to control the balance between memory
+ * throughput and memory controller power.
+ */
+#define TEGRA234_CLK_EMC			31U
 /** @brief output of gate CLK_ENB_FUSE */
 #define TEGRA234_CLK_FUSE			40U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */
diff --git a/include/dt-bindings/memory/tegra234-mc.h b/include/dt-bindings/memory/tegra234-mc.h
new file mode 100644
index 000000000000..2662f70c15c6
--- /dev/null
+++ b/include/dt-bindings/memory/tegra234-mc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+
+#ifndef DT_BINDINGS_MEMORY_TEGRA234_MC_H
+#define DT_BINDINGS_MEMORY_TEGRA234_MC_H
+
+/* special clients */
+#define TEGRA234_SID_INVALID		0x00
+#define TEGRA234_SID_PASSTHROUGH	0x7f
+
+
+/* NISO1 stream IDs */
+#define TEGRA234_SID_SDMMC4	0x02
+#define TEGRA234_SID_BPMP	0x10
+
+/*
+ * memory client IDs
+ */
+
+/* sdmmcd memory read client */
+#define TEGRA234_MEMORY_CLIENT_SDMMCRAB 0x63
+/* sdmmcd memory write client */
+#define TEGRA234_MEMORY_CLIENT_SDMMCWAB 0x67
+/* BPMP read client */
+#define TEGRA234_MEMORY_CLIENT_BPMPR 0x93
+/* BPMP write client */
+#define TEGRA234_MEMORY_CLIENT_BPMPW 0x94
+/* BPMPDMA read client */
+#define TEGRA234_MEMORY_CLIENT_BPMPDMAR 0x95
+/* BPMPDMA write client */
+#define TEGRA234_MEMORY_CLIENT_BPMPDMAW 0x96
+
+#endif
-- 
cgit v1.2.3


From 3c67d44de787dff288d7f2a51c372b22f7356db6 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 3 Dec 2021 06:48:53 -0700
Subject: block: add mq_ops->queue_rqs hook

If we have a list of requests in our plug list, send it to the driver in
one go, if possible. The driver must set mq_ops->queue_rqs() to support
this, if not the usual one-by-one path is used.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 772f8f921526..550996cf419c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -492,6 +492,14 @@ struct blk_mq_ops {
 	 */
 	void (*commit_rqs)(struct blk_mq_hw_ctx *);
 
+	/**
+	 * @queue_rqs: Queue a list of new requests. Driver is guaranteed
+	 * that each request belongs to the same queue. If the driver doesn't
+	 * empty the @rqlist completely, then the rest will be queued
+	 * individually by the block layer upon return.
+	 */
+	void (*queue_rqs)(struct request **rqlist);
+
 	/**
 	 * @get_budget: Reserve budget before queue request, once .queue_rq is
 	 * run, it is driver's responsibility to release the
-- 
cgit v1.2.3


From 38bb8a7264daf0ff5bb3024ae94bc465de78203d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 25 Jun 2021 11:29:51 +0200
Subject: dt-bindings: clock: starfive: Add JH7100 clock definitions

Add all clock outputs for the StarFive JH7100 clock generator.

Based on work by Ahmad Fatoum for Barebox, with "JH7100_" prefixes added
to all definitions.

Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
---
 include/dt-bindings/clock/starfive-jh7100.h | 202 ++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)
 create mode 100644 include/dt-bindings/clock/starfive-jh7100.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/starfive-jh7100.h b/include/dt-bindings/clock/starfive-jh7100.h
new file mode 100644
index 000000000000..aa0863b9728d
--- /dev/null
+++ b/include/dt-bindings/clock/starfive-jh7100.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2021 Ahmad Fatoum, Pengutronix
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_STARFIVE_JH7100_H__
+#define __DT_BINDINGS_CLOCK_STARFIVE_JH7100_H__
+
+#define JH7100_CLK_CPUNDBUS_ROOT	0
+#define JH7100_CLK_DLA_ROOT		1
+#define JH7100_CLK_DSP_ROOT		2
+#define JH7100_CLK_GMACUSB_ROOT		3
+#define JH7100_CLK_PERH0_ROOT		4
+#define JH7100_CLK_PERH1_ROOT		5
+#define JH7100_CLK_VIN_ROOT		6
+#define JH7100_CLK_VOUT_ROOT		7
+#define JH7100_CLK_AUDIO_ROOT		8
+#define JH7100_CLK_CDECHIFI4_ROOT	9
+#define JH7100_CLK_CDEC_ROOT		10
+#define JH7100_CLK_VOUTBUS_ROOT		11
+#define JH7100_CLK_CPUNBUS_ROOT_DIV	12
+#define JH7100_CLK_DSP_ROOT_DIV		13
+#define JH7100_CLK_PERH0_SRC		14
+#define JH7100_CLK_PERH1_SRC		15
+#define JH7100_CLK_PLL0_TESTOUT		16
+#define JH7100_CLK_PLL1_TESTOUT		17
+#define JH7100_CLK_PLL2_TESTOUT		18
+#define JH7100_CLK_PLL2_REF		19
+#define JH7100_CLK_CPU_CORE		20
+#define JH7100_CLK_CPU_AXI		21
+#define JH7100_CLK_AHB_BUS		22
+#define JH7100_CLK_APB1_BUS		23
+#define JH7100_CLK_APB2_BUS		24
+#define JH7100_CLK_DOM3AHB_BUS		25
+#define JH7100_CLK_DOM7AHB_BUS		26
+#define JH7100_CLK_U74_CORE0		27
+#define JH7100_CLK_U74_CORE1		28
+#define JH7100_CLK_U74_AXI		29
+#define JH7100_CLK_U74RTC_TOGGLE	30
+#define JH7100_CLK_SGDMA2P_AXI		31
+#define JH7100_CLK_DMA2PNOC_AXI		32
+#define JH7100_CLK_SGDMA2P_AHB		33
+#define JH7100_CLK_DLA_BUS		34
+#define JH7100_CLK_DLA_AXI		35
+#define JH7100_CLK_DLANOC_AXI		36
+#define JH7100_CLK_DLA_APB		37
+#define JH7100_CLK_VP6_CORE		38
+#define JH7100_CLK_VP6BUS_SRC		39
+#define JH7100_CLK_VP6_AXI		40
+#define JH7100_CLK_VCDECBUS_SRC		41
+#define JH7100_CLK_VDEC_BUS		42
+#define JH7100_CLK_VDEC_AXI		43
+#define JH7100_CLK_VDECBRG_MAIN		44
+#define JH7100_CLK_VDEC_BCLK		45
+#define JH7100_CLK_VDEC_CCLK		46
+#define JH7100_CLK_VDEC_APB		47
+#define JH7100_CLK_JPEG_AXI		48
+#define JH7100_CLK_JPEG_CCLK		49
+#define JH7100_CLK_JPEG_APB		50
+#define JH7100_CLK_GC300_2X		51
+#define JH7100_CLK_GC300_AHB		52
+#define JH7100_CLK_JPCGC300_AXIBUS	53
+#define JH7100_CLK_GC300_AXI		54
+#define JH7100_CLK_JPCGC300_MAIN	55
+#define JH7100_CLK_VENC_BUS		56
+#define JH7100_CLK_VENC_AXI		57
+#define JH7100_CLK_VENCBRG_MAIN		58
+#define JH7100_CLK_VENC_BCLK		59
+#define JH7100_CLK_VENC_CCLK		60
+#define JH7100_CLK_VENC_APB		61
+#define JH7100_CLK_DDRPLL_DIV2		62
+#define JH7100_CLK_DDRPLL_DIV4		63
+#define JH7100_CLK_DDRPLL_DIV8		64
+#define JH7100_CLK_DDROSC_DIV2		65
+#define JH7100_CLK_DDRC0		66
+#define JH7100_CLK_DDRC1		67
+#define JH7100_CLK_DDRPHY_APB		68
+#define JH7100_CLK_NOC_ROB		69
+#define JH7100_CLK_NOC_COG		70
+#define JH7100_CLK_NNE_AHB		71
+#define JH7100_CLK_NNEBUS_SRC1		72
+#define JH7100_CLK_NNE_BUS		73
+#define JH7100_CLK_NNE_AXI		74
+#define JH7100_CLK_NNENOC_AXI		75
+#define JH7100_CLK_DLASLV_AXI		76
+#define JH7100_CLK_DSPX2C_AXI		77
+#define JH7100_CLK_HIFI4_SRC		78
+#define JH7100_CLK_HIFI4_COREFREE	79
+#define JH7100_CLK_HIFI4_CORE		80
+#define JH7100_CLK_HIFI4_BUS		81
+#define JH7100_CLK_HIFI4_AXI		82
+#define JH7100_CLK_HIFI4NOC_AXI		83
+#define JH7100_CLK_SGDMA1P_BUS		84
+#define JH7100_CLK_SGDMA1P_AXI		85
+#define JH7100_CLK_DMA1P_AXI		86
+#define JH7100_CLK_X2C_AXI		87
+#define JH7100_CLK_USB_BUS		88
+#define JH7100_CLK_USB_AXI		89
+#define JH7100_CLK_USBNOC_AXI		90
+#define JH7100_CLK_USBPHY_ROOTDIV	91
+#define JH7100_CLK_USBPHY_125M		92
+#define JH7100_CLK_USBPHY_PLLDIV25M	93
+#define JH7100_CLK_USBPHY_25M		94
+#define JH7100_CLK_AUDIO_DIV		95
+#define JH7100_CLK_AUDIO_SRC		96
+#define JH7100_CLK_AUDIO_12288		97
+#define JH7100_CLK_VIN_SRC		98
+#define JH7100_CLK_ISP0_BUS		99
+#define JH7100_CLK_ISP0_AXI		100
+#define JH7100_CLK_ISP0NOC_AXI		101
+#define JH7100_CLK_ISPSLV_AXI		102
+#define JH7100_CLK_ISP1_BUS		103
+#define JH7100_CLK_ISP1_AXI		104
+#define JH7100_CLK_ISP1NOC_AXI		105
+#define JH7100_CLK_VIN_BUS		106
+#define JH7100_CLK_VIN_AXI		107
+#define JH7100_CLK_VINNOC_AXI		108
+#define JH7100_CLK_VOUT_SRC		109
+#define JH7100_CLK_DISPBUS_SRC		110
+#define JH7100_CLK_DISP_BUS		111
+#define JH7100_CLK_DISP_AXI		112
+#define JH7100_CLK_DISPNOC_AXI		113
+#define JH7100_CLK_SDIO0_AHB		114
+#define JH7100_CLK_SDIO0_CCLKINT	115
+#define JH7100_CLK_SDIO0_CCLKINT_INV	116
+#define JH7100_CLK_SDIO1_AHB		117
+#define JH7100_CLK_SDIO1_CCLKINT	118
+#define JH7100_CLK_SDIO1_CCLKINT_INV	119
+#define JH7100_CLK_GMAC_AHB		120
+#define JH7100_CLK_GMAC_ROOT_DIV	121
+#define JH7100_CLK_GMAC_PTP_REF		122
+#define JH7100_CLK_GMAC_GTX		123
+#define JH7100_CLK_GMAC_RMII_TX		124
+#define JH7100_CLK_GMAC_RMII_RX		125
+#define JH7100_CLK_GMAC_TX		126
+#define JH7100_CLK_GMAC_TX_INV		127
+#define JH7100_CLK_GMAC_RX_PRE		128
+#define JH7100_CLK_GMAC_RX_INV		129
+#define JH7100_CLK_GMAC_RMII		130
+#define JH7100_CLK_GMAC_TOPHYREF	131
+#define JH7100_CLK_SPI2AHB_AHB		132
+#define JH7100_CLK_SPI2AHB_CORE		133
+#define JH7100_CLK_EZMASTER_AHB		134
+#define JH7100_CLK_E24_AHB		135
+#define JH7100_CLK_E24RTC_TOGGLE	136
+#define JH7100_CLK_QSPI_AHB		137
+#define JH7100_CLK_QSPI_APB		138
+#define JH7100_CLK_QSPI_REF		139
+#define JH7100_CLK_SEC_AHB		140
+#define JH7100_CLK_AES			141
+#define JH7100_CLK_SHA			142
+#define JH7100_CLK_PKA			143
+#define JH7100_CLK_TRNG_APB		144
+#define JH7100_CLK_OTP_APB		145
+#define JH7100_CLK_UART0_APB		146
+#define JH7100_CLK_UART0_CORE		147
+#define JH7100_CLK_UART1_APB		148
+#define JH7100_CLK_UART1_CORE		149
+#define JH7100_CLK_SPI0_APB		150
+#define JH7100_CLK_SPI0_CORE		151
+#define JH7100_CLK_SPI1_APB		152
+#define JH7100_CLK_SPI1_CORE		153
+#define JH7100_CLK_I2C0_APB		154
+#define JH7100_CLK_I2C0_CORE		155
+#define JH7100_CLK_I2C1_APB		156
+#define JH7100_CLK_I2C1_CORE		157
+#define JH7100_CLK_GPIO_APB		158
+#define JH7100_CLK_UART2_APB		159
+#define JH7100_CLK_UART2_CORE		160
+#define JH7100_CLK_UART3_APB		161
+#define JH7100_CLK_UART3_CORE		162
+#define JH7100_CLK_SPI2_APB		163
+#define JH7100_CLK_SPI2_CORE		164
+#define JH7100_CLK_SPI3_APB		165
+#define JH7100_CLK_SPI3_CORE		166
+#define JH7100_CLK_I2C2_APB		167
+#define JH7100_CLK_I2C2_CORE		168
+#define JH7100_CLK_I2C3_APB		169
+#define JH7100_CLK_I2C3_CORE		170
+#define JH7100_CLK_WDTIMER_APB		171
+#define JH7100_CLK_WDT_CORE		172
+#define JH7100_CLK_TIMER0_CORE		173
+#define JH7100_CLK_TIMER1_CORE		174
+#define JH7100_CLK_TIMER2_CORE		175
+#define JH7100_CLK_TIMER3_CORE		176
+#define JH7100_CLK_TIMER4_CORE		177
+#define JH7100_CLK_TIMER5_CORE		178
+#define JH7100_CLK_TIMER6_CORE		179
+#define JH7100_CLK_VP6INTC_APB		180
+#define JH7100_CLK_PWM_APB		181
+#define JH7100_CLK_MSI_APB		182
+#define JH7100_CLK_TEMP_APB		183
+#define JH7100_CLK_TEMP_SENSE		184
+#define JH7100_CLK_SYSERR_APB		185
+
+#define JH7100_CLK_PLL0_OUT		186
+#define JH7100_CLK_PLL1_OUT		187
+#define JH7100_CLK_PLL2_OUT		188
+
+#define JH7100_CLK_END			189
+
+#endif /* __DT_BINDINGS_CLOCK_STARFIVE_JH7100_H__ */
-- 
cgit v1.2.3


From 810e287e83b69ff8563bde15cae9120c802ac5d7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 25 Jun 2021 11:30:00 +0200
Subject: dt-bindings: reset: Add StarFive JH7100 reset definitions

Add all resets for the StarFive JH7100 reset controller.

Based on work by Ahmad Fatoum for Barebox, with "JH7100_" prefixes added
to all definitions.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
---
 include/dt-bindings/reset/starfive-jh7100.h | 126 ++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 include/dt-bindings/reset/starfive-jh7100.h

(limited to 'include')

diff --git a/include/dt-bindings/reset/starfive-jh7100.h b/include/dt-bindings/reset/starfive-jh7100.h
new file mode 100644
index 000000000000..540e19254f39
--- /dev/null
+++ b/include/dt-bindings/reset/starfive-jh7100.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2021 Ahmad Fatoum, Pengutronix
+ */
+
+#ifndef __DT_BINDINGS_RESET_STARFIVE_JH7100_H__
+#define __DT_BINDINGS_RESET_STARFIVE_JH7100_H__
+
+#define JH7100_RSTN_DOM3AHB_BUS		0
+#define JH7100_RSTN_DOM7AHB_BUS		1
+#define JH7100_RST_U74			2
+#define JH7100_RSTN_U74_AXI		3
+#define JH7100_RSTN_SGDMA2P_AHB		4
+#define JH7100_RSTN_SGDMA2P_AXI		5
+#define JH7100_RSTN_DMA2PNOC_AXI	6
+#define JH7100_RSTN_DLA_AXI		7
+#define JH7100_RSTN_DLANOC_AXI		8
+#define JH7100_RSTN_DLA_APB		9
+#define JH7100_RST_VP6_DRESET		10
+#define JH7100_RST_VP6_BRESET		11
+#define JH7100_RSTN_VP6_AXI		12
+#define JH7100_RSTN_VDECBRG_MAIN	13
+#define JH7100_RSTN_VDEC_AXI		14
+#define JH7100_RSTN_VDEC_BCLK		15
+#define JH7100_RSTN_VDEC_CCLK		16
+#define JH7100_RSTN_VDEC_APB		17
+#define JH7100_RSTN_JPEG_AXI		18
+#define JH7100_RSTN_JPEG_CCLK		19
+#define JH7100_RSTN_JPEG_APB		20
+#define JH7100_RSTN_JPCGC300_MAIN	21
+#define JH7100_RSTN_GC300_2X		22
+#define JH7100_RSTN_GC300_AXI		23
+#define JH7100_RSTN_GC300_AHB		24
+#define JH7100_RSTN_VENC_AXI		25
+#define JH7100_RSTN_VENCBRG_MAIN	26
+#define JH7100_RSTN_VENC_BCLK		27
+#define JH7100_RSTN_VENC_CCLK		28
+#define JH7100_RSTN_VENC_APB		29
+#define JH7100_RSTN_DDRPHY_APB		30
+#define JH7100_RSTN_NOC_ROB		31
+#define JH7100_RSTN_NOC_COG		32
+#define JH7100_RSTN_HIFI4_AXI		33
+#define JH7100_RSTN_HIFI4NOC_AXI	34
+#define JH7100_RST_HIFI4_DRESET		35
+#define JH7100_RST_HIFI4_BRESET		36
+#define JH7100_RSTN_USB_AXI		37
+#define JH7100_RSTN_USBNOC_AXI		38
+#define JH7100_RSTN_SGDMA1P_AXI		39
+#define JH7100_RSTN_DMA1P_AXI		40
+#define JH7100_RSTN_X2C_AXI		41
+#define JH7100_RSTN_NNE_AHB		42
+#define JH7100_RSTN_NNE_AXI		43
+#define JH7100_RSTN_NNENOC_AXI		44
+#define JH7100_RSTN_DLASLV_AXI		45
+#define JH7100_RSTN_DSPX2C_AXI		46
+#define JH7100_RSTN_VIN_SRC		47
+#define JH7100_RSTN_ISPSLV_AXI		48
+#define JH7100_RSTN_VIN_AXI		49
+#define JH7100_RSTN_VINNOC_AXI		50
+#define JH7100_RSTN_ISP0_AXI		51
+#define JH7100_RSTN_ISP0NOC_AXI		52
+#define JH7100_RSTN_ISP1_AXI		53
+#define JH7100_RSTN_ISP1NOC_AXI		54
+#define JH7100_RSTN_VOUT_SRC		55
+#define JH7100_RSTN_DISP_AXI		56
+#define JH7100_RSTN_DISPNOC_AXI		57
+#define JH7100_RSTN_SDIO0_AHB		58
+#define JH7100_RSTN_SDIO1_AHB		59
+#define JH7100_RSTN_GMAC_AHB		60
+#define JH7100_RSTN_SPI2AHB_AHB		61
+#define JH7100_RSTN_SPI2AHB_CORE	62
+#define JH7100_RSTN_EZMASTER_AHB	63
+#define JH7100_RST_E24			64
+#define JH7100_RSTN_QSPI_AHB		65
+#define JH7100_RSTN_QSPI_CORE		66
+#define JH7100_RSTN_QSPI_APB		67
+#define JH7100_RSTN_SEC_AHB		68
+#define JH7100_RSTN_AES			69
+#define JH7100_RSTN_PKA			70
+#define JH7100_RSTN_SHA			71
+#define JH7100_RSTN_TRNG_APB		72
+#define JH7100_RSTN_OTP_APB		73
+#define JH7100_RSTN_UART0_APB		74
+#define JH7100_RSTN_UART0_CORE		75
+#define JH7100_RSTN_UART1_APB		76
+#define JH7100_RSTN_UART1_CORE		77
+#define JH7100_RSTN_SPI0_APB		78
+#define JH7100_RSTN_SPI0_CORE		79
+#define JH7100_RSTN_SPI1_APB		80
+#define JH7100_RSTN_SPI1_CORE		81
+#define JH7100_RSTN_I2C0_APB		82
+#define JH7100_RSTN_I2C0_CORE		83
+#define JH7100_RSTN_I2C1_APB		84
+#define JH7100_RSTN_I2C1_CORE		85
+#define JH7100_RSTN_GPIO_APB		86
+#define JH7100_RSTN_UART2_APB		87
+#define JH7100_RSTN_UART2_CORE		88
+#define JH7100_RSTN_UART3_APB		89
+#define JH7100_RSTN_UART3_CORE		90
+#define JH7100_RSTN_SPI2_APB		91
+#define JH7100_RSTN_SPI2_CORE		92
+#define JH7100_RSTN_SPI3_APB		93
+#define JH7100_RSTN_SPI3_CORE		94
+#define JH7100_RSTN_I2C2_APB		95
+#define JH7100_RSTN_I2C2_CORE		96
+#define JH7100_RSTN_I2C3_APB		97
+#define JH7100_RSTN_I2C3_CORE		98
+#define JH7100_RSTN_WDTIMER_APB		99
+#define JH7100_RSTN_WDT			100
+#define JH7100_RSTN_TIMER0		101
+#define JH7100_RSTN_TIMER1		102
+#define JH7100_RSTN_TIMER2		103
+#define JH7100_RSTN_TIMER3		104
+#define JH7100_RSTN_TIMER4		105
+#define JH7100_RSTN_TIMER5		106
+#define JH7100_RSTN_TIMER6		107
+#define JH7100_RSTN_VP6INTC_APB		108
+#define JH7100_RSTN_PWM_APB		109
+#define JH7100_RSTN_MSI_APB		110
+#define JH7100_RSTN_TEMP_APB		111
+#define JH7100_RSTN_TEMP_SENSE		112
+#define JH7100_RSTN_SYSERR_APB		113
+
+#define JH7100_RSTN_END			114
+
+#endif /* __DT_BINDINGS_RESET_STARFIVE_JH7100_H__ */
-- 
cgit v1.2.3


From 3021114b3d172cf80c074c81425741f9e26c6679 Mon Sep 17 00:00:00 2001
From: Emil Renner Berthing <kernel@esmil.dk>
Date: Tue, 6 Jul 2021 20:19:06 +0200
Subject: dt-bindings: pinctrl: Add StarFive pinctrl definitions

Add definitons for pins and GPIO input, output and output enable
signals on the StarFive JH7100 SoC.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
---
 include/dt-bindings/pinctrl/pinctrl-starfive.h | 275 +++++++++++++++++++++++++
 1 file changed, 275 insertions(+)
 create mode 100644 include/dt-bindings/pinctrl/pinctrl-starfive.h

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/pinctrl-starfive.h b/include/dt-bindings/pinctrl/pinctrl-starfive.h
new file mode 100644
index 000000000000..de4f75c2c9e8
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-starfive.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk>
+ */
+
+#ifndef __DT_BINDINGS_PINCTRL_STARFIVE_H__
+#define __DT_BINDINGS_PINCTRL_STARFIVE_H__
+
+#define PAD_GPIO_OFFSET		0
+#define PAD_FUNC_SHARE_OFFSET	64
+#define PAD_GPIO(x)		(PAD_GPIO_OFFSET + (x))
+#define PAD_FUNC_SHARE(x)	(PAD_FUNC_SHARE_OFFSET + (x))
+
+/*
+ * GPIOMUX bits:
+ *  | 31 - 24 | 23 - 16 | 15 - 8 |     7    |     6    |  5 - 0  |
+ *  |  dout   |  doen   |  din   | dout rev | doen rev | gpio nr |
+ *
+ * dout:     output signal
+ * doen:     output enable signal
+ * din:      optional input signal, 0xff = none
+ * dout rev: output signal reverse bit
+ * doen rev: output enable signal reverse bit
+ * gpio nr:  gpio number, 0 - 63
+ */
+#define GPIOMUX(n, dout, doen, din) ( \
+		(((dout) & 0x80000000) >> (31 - 7)) | (((dout) & 0xff) << 24) | \
+		(((doen) & 0x80000000) >> (31 - 6)) | (((doen) & 0xff) << 16) | \
+		(((din) & 0xff) << 8) | \
+		((n) & 0x3f))
+
+#define GPO_REVERSE				0x80000000
+
+#define GPO_LOW					0
+#define GPO_HIGH				1
+#define GPO_ENABLE				0
+#define GPO_DISABLE				1
+#define GPO_CLK_GMAC_PAPHYREF			2
+#define GPO_JTAG_TDO				3
+#define GPO_JTAG_TDO_OEN			4
+#define GPO_DMIC_CLK_OUT			5
+#define GPO_DSP_JTDOEN_PAD			6
+#define GPO_DSP_JTDO_PAD			7
+#define GPO_I2C0_PAD_SCK_OE			8
+#define GPO_I2C0_PAD_SCK_OEN			(GPO_I2C0_PAD_SCK_OE | GPO_REVERSE)
+#define GPO_I2C0_PAD_SDA_OE			9
+#define GPO_I2C0_PAD_SDA_OEN			(GPO_I2C0_PAD_SDA_OE | GPO_REVERSE)
+#define GPO_I2C1_PAD_SCK_OE			10
+#define GPO_I2C1_PAD_SCK_OEN			(GPO_I2C1_PAD_SCK_OE | GPO_REVERSE)
+#define GPO_I2C1_PAD_SDA_OE			11
+#define GPO_I2C1_PAD_SDA_OEN			(GPO_I2C1_PAD_SDA_OE | GPO_REVERSE)
+#define GPO_I2C2_PAD_SCK_OE			12
+#define GPO_I2C2_PAD_SCK_OEN			(GPO_I2C2_PAD_SCK_OE | GPO_REVERSE)
+#define GPO_I2C2_PAD_SDA_OE			13
+#define GPO_I2C2_PAD_SDA_OEN			(GPO_I2C2_PAD_SDA_OE | GPO_REVERSE)
+#define GPO_I2C3_PAD_SCK_OE			14
+#define GPO_I2C3_PAD_SCK_OEN			(GPO_I2C3_PAD_SCK_OE | GPO_REVERSE)
+#define GPO_I2C3_PAD_SDA_OE			15
+#define GPO_I2C3_PAD_SDA_OEN			(GPO_I2C3_PAD_SDA_OE | GPO_REVERSE)
+#define GPO_I2SRX_BCLK_OUT			16
+#define GPO_I2SRX_BCLK_OUT_OEN			17
+#define GPO_I2SRX_LRCK_OUT			18
+#define GPO_I2SRX_LRCK_OUT_OEN			19
+#define GPO_I2SRX_MCLK_OUT			20
+#define GPO_I2STX_BCLK_OUT			21
+#define GPO_I2STX_BCLK_OUT_OEN			22
+#define GPO_I2STX_LRCK_OUT			23
+#define GPO_I2STX_LRCK_OUT_OEN			24
+#define GPO_I2STX_MCLK_OUT			25
+#define GPO_I2STX_SDOUT0			26
+#define GPO_I2STX_SDOUT1			27
+#define GPO_LCD_PAD_CSM_N			28
+#define GPO_PWM_PAD_OE_N_BIT0			29
+#define GPO_PWM_PAD_OE_N_BIT1			30
+#define GPO_PWM_PAD_OE_N_BIT2			31
+#define GPO_PWM_PAD_OE_N_BIT3			32
+#define GPO_PWM_PAD_OE_N_BIT4			33
+#define GPO_PWM_PAD_OE_N_BIT5			34
+#define GPO_PWM_PAD_OE_N_BIT6			35
+#define GPO_PWM_PAD_OE_N_BIT7			36
+#define GPO_PWM_PAD_OUT_BIT0			37
+#define GPO_PWM_PAD_OUT_BIT1			38
+#define GPO_PWM_PAD_OUT_BIT2			39
+#define GPO_PWM_PAD_OUT_BIT3			40
+#define GPO_PWM_PAD_OUT_BIT4			41
+#define GPO_PWM_PAD_OUT_BIT5			42
+#define GPO_PWM_PAD_OUT_BIT6			43
+#define GPO_PWM_PAD_OUT_BIT7			44
+#define GPO_PWMDAC_LEFT_OUT			45
+#define GPO_PWMDAC_RIGHT_OUT			46
+#define GPO_QSPI_CSN1_OUT			47
+#define GPO_QSPI_CSN2_OUT			48
+#define GPO_QSPI_CSN3_OUT			49
+#define GPO_REGISTER23_SCFG_CMSENSOR_RST0	50
+#define GPO_REGISTER23_SCFG_CMSENSOR_RST1	51
+#define GPO_REGISTER32_SCFG_GMAC_PHY_RSTN	52
+#define GPO_SDIO0_PAD_CARD_POWER_EN		53
+#define GPO_SDIO0_PAD_CCLK_OUT			54
+#define GPO_SDIO0_PAD_CCMD_OE			55
+#define GPO_SDIO0_PAD_CCMD_OEN			(GPO_SDIO0_PAD_CCMD_OE | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CCMD_OUT			56
+#define GPO_SDIO0_PAD_CDATA_OE_BIT0		57
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT0		(GPO_SDIO0_PAD_CDATA_OE_BIT0 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT1		58
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT1		(GPO_SDIO0_PAD_CDATA_OE_BIT1 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT2		59
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT2		(GPO_SDIO0_PAD_CDATA_OE_BIT2 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT3		60
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT3		(GPO_SDIO0_PAD_CDATA_OE_BIT3 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT4		61
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT4		(GPO_SDIO0_PAD_CDATA_OE_BIT4 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT5		62
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT5		(GPO_SDIO0_PAD_CDATA_OE_BIT5 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT6		63
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT6		(GPO_SDIO0_PAD_CDATA_OE_BIT6 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OE_BIT7		64
+#define GPO_SDIO0_PAD_CDATA_OEN_BIT7		(GPO_SDIO0_PAD_CDATA_OE_BIT7 | GPO_REVERSE)
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT0		65
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT1		66
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT2		67
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT3		68
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT4		69
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT5		70
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT6		71
+#define GPO_SDIO0_PAD_CDATA_OUT_BIT7		72
+#define GPO_SDIO0_PAD_RST_N			73
+#define GPO_SDIO1_PAD_CARD_POWER_EN		74
+#define GPO_SDIO1_PAD_CCLK_OUT			75
+#define GPO_SDIO1_PAD_CCMD_OE			76
+#define GPO_SDIO1_PAD_CCMD_OEN			(GPO_SDIO1_PAD_CCMD_OE | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CCMD_OUT			77
+#define GPO_SDIO1_PAD_CDATA_OE_BIT0		78
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT0		(GPO_SDIO1_PAD_CDATA_OE_BIT0 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT1		79
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT1		(GPO_SDIO1_PAD_CDATA_OE_BIT1 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT2		80
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT2		(GPO_SDIO1_PAD_CDATA_OE_BIT2 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT3		81
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT3		(GPO_SDIO1_PAD_CDATA_OE_BIT3 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT4		82
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT4		(GPO_SDIO1_PAD_CDATA_OE_BIT4 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT5		83
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT5		(GPO_SDIO1_PAD_CDATA_OE_BIT5 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT6		84
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT6		(GPO_SDIO1_PAD_CDATA_OE_BIT6 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OE_BIT7		85
+#define GPO_SDIO1_PAD_CDATA_OEN_BIT7		(GPO_SDIO1_PAD_CDATA_OE_BIT7 | GPO_REVERSE)
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT0		86
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT1		87
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT2		88
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT3		89
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT4		90
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT5		91
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT6		92
+#define GPO_SDIO1_PAD_CDATA_OUT_BIT7		93
+#define GPO_SDIO1_PAD_RST_N			94
+#define GPO_SPDIF_TX_SDOUT			95
+#define GPO_SPDIF_TX_SDOUT_OEN			96
+#define GPO_SPI0_PAD_OE_N			97
+#define GPO_SPI0_PAD_SCK_OUT			98
+#define GPO_SPI0_PAD_SS_0_N			99
+#define GPO_SPI0_PAD_SS_1_N			100
+#define GPO_SPI0_PAD_TXD			101
+#define GPO_SPI1_PAD_OE_N			102
+#define GPO_SPI1_PAD_SCK_OUT			103
+#define GPO_SPI1_PAD_SS_0_N			104
+#define GPO_SPI1_PAD_SS_1_N			105
+#define GPO_SPI1_PAD_TXD			106
+#define GPO_SPI2_PAD_OE_N			107
+#define GPO_SPI2_PAD_SCK_OUT			108
+#define GPO_SPI2_PAD_SS_0_N			109
+#define GPO_SPI2_PAD_SS_1_N			110
+#define GPO_SPI2_PAD_TXD			111
+#define GPO_SPI2AHB_PAD_OE_N_BIT0		112
+#define GPO_SPI2AHB_PAD_OE_N_BIT1		113
+#define GPO_SPI2AHB_PAD_OE_N_BIT2		114
+#define GPO_SPI2AHB_PAD_OE_N_BIT3		115
+#define GPO_SPI2AHB_PAD_TXD_BIT0		116
+#define GPO_SPI2AHB_PAD_TXD_BIT1		117
+#define GPO_SPI2AHB_PAD_TXD_BIT2		118
+#define GPO_SPI2AHB_PAD_TXD_BIT3		119
+#define GPO_SPI3_PAD_OE_N			120
+#define GPO_SPI3_PAD_SCK_OUT			121
+#define GPO_SPI3_PAD_SS_0_N			122
+#define GPO_SPI3_PAD_SS_1_N			123
+#define GPO_SPI3_PAD_TXD			124
+#define GPO_UART0_PAD_DTRN			125
+#define GPO_UART0_PAD_RTSN			126
+#define GPO_UART0_PAD_SOUT			127
+#define GPO_UART1_PAD_SOUT			128
+#define GPO_UART2_PAD_DTR_N			129
+#define GPO_UART2_PAD_RTS_N			130
+#define GPO_UART2_PAD_SOUT			131
+#define GPO_UART3_PAD_SOUT			132
+#define GPO_USB_DRV_BUS				133
+
+#define GPI_CPU_JTAG_TCK			0
+#define GPI_CPU_JTAG_TDI			1
+#define GPI_CPU_JTAG_TMS			2
+#define GPI_CPU_JTAG_TRST			3
+#define GPI_DMIC_SDIN_BIT0			4
+#define GPI_DMIC_SDIN_BIT1			5
+#define GPI_DSP_JTCK_PAD			6
+#define GPI_DSP_JTDI_PAD			7
+#define GPI_DSP_JTMS_PAD			8
+#define GPI_DSP_TRST_PAD			9
+#define GPI_I2C0_PAD_SCK_IN			10
+#define GPI_I2C0_PAD_SDA_IN			11
+#define GPI_I2C1_PAD_SCK_IN			12
+#define GPI_I2C1_PAD_SDA_IN			13
+#define GPI_I2C2_PAD_SCK_IN			14
+#define GPI_I2C2_PAD_SDA_IN			15
+#define GPI_I2C3_PAD_SCK_IN			16
+#define GPI_I2C3_PAD_SDA_IN			17
+#define GPI_I2SRX_BCLK_IN			18
+#define GPI_I2SRX_LRCK_IN			19
+#define GPI_I2SRX_SDIN_BIT0			20
+#define GPI_I2SRX_SDIN_BIT1			21
+#define GPI_I2SRX_SDIN_BIT2			22
+#define GPI_I2STX_BCLK_IN			23
+#define GPI_I2STX_LRCK_IN			24
+#define GPI_SDIO0_PAD_CARD_DETECT_N		25
+#define GPI_SDIO0_PAD_CARD_WRITE_PRT		26
+#define GPI_SDIO0_PAD_CCMD_IN			27
+#define GPI_SDIO0_PAD_CDATA_IN_BIT0		28
+#define GPI_SDIO0_PAD_CDATA_IN_BIT1		29
+#define GPI_SDIO0_PAD_CDATA_IN_BIT2		30
+#define GPI_SDIO0_PAD_CDATA_IN_BIT3		31
+#define GPI_SDIO0_PAD_CDATA_IN_BIT4		32
+#define GPI_SDIO0_PAD_CDATA_IN_BIT5		33
+#define GPI_SDIO0_PAD_CDATA_IN_BIT6		34
+#define GPI_SDIO0_PAD_CDATA_IN_BIT7		35
+#define GPI_SDIO1_PAD_CARD_DETECT_N		36
+#define GPI_SDIO1_PAD_CARD_WRITE_PRT		37
+#define GPI_SDIO1_PAD_CCMD_IN			38
+#define GPI_SDIO1_PAD_CDATA_IN_BIT0		39
+#define GPI_SDIO1_PAD_CDATA_IN_BIT1		40
+#define GPI_SDIO1_PAD_CDATA_IN_BIT2		41
+#define GPI_SDIO1_PAD_CDATA_IN_BIT3		42
+#define GPI_SDIO1_PAD_CDATA_IN_BIT4		43
+#define GPI_SDIO1_PAD_CDATA_IN_BIT5		44
+#define GPI_SDIO1_PAD_CDATA_IN_BIT6		45
+#define GPI_SDIO1_PAD_CDATA_IN_BIT7		46
+#define GPI_SPDIF_RX_SDIN			47
+#define GPI_SPI0_PAD_RXD			48
+#define GPI_SPI0_PAD_SS_IN_N			49
+#define GPI_SPI1_PAD_RXD			50
+#define GPI_SPI1_PAD_SS_IN_N			51
+#define GPI_SPI2_PAD_RXD			52
+#define GPI_SPI2_PAD_SS_IN_N			53
+#define GPI_SPI2AHB_PAD_RXD_BIT0		54
+#define GPI_SPI2AHB_PAD_RXD_BIT1		55
+#define GPI_SPI2AHB_PAD_RXD_BIT2		56
+#define GPI_SPI2AHB_PAD_RXD_BIT3		57
+#define GPI_SPI2AHB_PAD_SS_N			58
+#define GPI_SPI2AHB_SLV_SCLKIN			59
+#define GPI_SPI3_PAD_RXD			60
+#define GPI_SPI3_PAD_SS_IN_N			61
+#define GPI_UART0_PAD_CTSN			62
+#define GPI_UART0_PAD_DCDN			63
+#define GPI_UART0_PAD_DSRN			64
+#define GPI_UART0_PAD_RIN			65
+#define GPI_UART0_PAD_SIN			66
+#define GPI_UART1_PAD_SIN			67
+#define GPI_UART2_PAD_CTS_N			68
+#define GPI_UART2_PAD_DCD_N			69
+#define GPI_UART2_PAD_DSR_N			70
+#define GPI_UART2_PAD_RI_N			71
+#define GPI_UART2_PAD_SIN			72
+#define GPI_UART3_PAD_SIN			73
+#define GPI_USB_OVER_CURRENT			74
+
+#define GPI_NONE				0xff
+
+#endif /* __DT_BINDINGS_PINCTRL_STARFIVE_H__ */
-- 
cgit v1.2.3


From 8a2ba1785c5803d59a63b6320ff54fd4a37a41ce Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 9 Dec 2021 07:31:21 +0100
Subject: block: remove the nr_task field from struct io_context

Nothing ever looks at ->nr_tasks, so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20211209063131.18537-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index c1229fbd6691..82c7f4f5f4f5 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -99,7 +99,6 @@ struct io_cq {
 struct io_context {
 	atomic_long_t refcount;
 	atomic_t active_ref;
-	atomic_t nr_tasks;
 
 	/* all the fields below are protected by this lock */
 	spinlock_t lock;
-- 
cgit v1.2.3


From a411cd3cfdc5bbd1329d5b33dbf39e2b5213969d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 9 Dec 2021 07:31:27 +0100
Subject: block: move set_task_ioprio to blk-ioc.c

Keep set_task_ioprio with the other low-level code that accesses the
io_context structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20211209063131.18537-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 82c7f4f5f4f5..648331f35fc6 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -116,8 +116,6 @@ struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
-struct io_context *get_task_io_context(struct task_struct *task,
-				       gfp_t gfp_flags, int node);
 int __copy_io(unsigned long clone_flags, struct task_struct *tsk);
 static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 {
-- 
cgit v1.2.3


From 5ef1630586317e92c9ebd7b4ce48f393b7ff790f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 9 Dec 2021 07:31:31 +0100
Subject: block: only build the icq tracking code when needed

Only bfq needs to code to track icq, so make it conditional.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20211209063131.18537-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 648331f35fc6..14f7eaf1b443 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -100,16 +100,18 @@ struct io_context {
 	atomic_long_t refcount;
 	atomic_t active_ref;
 
+	unsigned short ioprio;
+
+#ifdef CONFIG_BLK_ICQ
 	/* all the fields below are protected by this lock */
 	spinlock_t lock;
 
-	unsigned short ioprio;
-
 	struct radix_tree_root	icq_tree;
 	struct io_cq __rcu	*icq_hint;
 	struct hlist_head	icq_list;
 
 	struct work_struct release_work;
+#endif /* CONFIG_BLK_ICQ */
 };
 
 struct task_struct;
-- 
cgit v1.2.3


From 6d24d9546d6e6c524505e51bb5f76d9a83fac478 Mon Sep 17 00:00:00 2001
From: Marijn Suijten <marijn.suijten@somainline.org>
Date: Wed, 8 Dec 2021 10:10:35 +0100
Subject: dt-bindings: clk: qcom: Document MSM8976 Global Clock Controller

Document the required properties and firmware clocks for gcc-msm8976 to
operate nominally, and add header definitions for referencing the clocks
from firmware.

Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211208091036.132334-2-marijn.suijten@somainline.org
---
 include/dt-bindings/clock/qcom,gcc-msm8976.h | 240 +++++++++++++++++++++++++++
 1 file changed, 240 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,gcc-msm8976.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,gcc-msm8976.h b/include/dt-bindings/clock/qcom,gcc-msm8976.h
new file mode 100644
index 000000000000..51955fd49426
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-msm8976.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2016, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2016-2021, AngeloGioacchino Del Regno
+ *                     <angelogioacchino.delregno@somainline.org>
+ */
+
+#ifndef _DT_BINDINGS_CLK_MSM_GCC_8976_H
+#define _DT_BINDINGS_CLK_MSM_GCC_8976_H
+
+#define GPLL0					0
+#define GPLL2					1
+#define GPLL3					2
+#define GPLL4					3
+#define GPLL6					4
+#define GPLL0_CLK_SRC				5
+#define GPLL2_CLK_SRC				6
+#define GPLL3_CLK_SRC				7
+#define GPLL4_CLK_SRC				8
+#define GPLL6_CLK_SRC				9
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK		10
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK		11
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK		12
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK		13
+#define GCC_BLSP1_QUP3_I2C_APPS_CLK		14
+#define GCC_BLSP1_QUP3_SPI_APPS_CLK		15
+#define GCC_BLSP1_QUP4_I2C_APPS_CLK		16
+#define GCC_BLSP1_QUP4_SPI_APPS_CLK		17
+#define GCC_BLSP1_UART1_APPS_CLK		18
+#define GCC_BLSP1_UART2_APPS_CLK		19
+#define GCC_BLSP2_QUP1_I2C_APPS_CLK		20
+#define GCC_BLSP2_QUP1_SPI_APPS_CLK		21
+#define GCC_BLSP2_QUP2_I2C_APPS_CLK		22
+#define GCC_BLSP2_QUP2_SPI_APPS_CLK		23
+#define GCC_BLSP2_QUP3_I2C_APPS_CLK		24
+#define GCC_BLSP2_QUP3_SPI_APPS_CLK		25
+#define GCC_BLSP2_QUP4_I2C_APPS_CLK		26
+#define GCC_BLSP2_QUP4_SPI_APPS_CLK		27
+#define GCC_BLSP2_UART1_APPS_CLK		28
+#define GCC_BLSP2_UART2_APPS_CLK		29
+#define GCC_CAMSS_CCI_AHB_CLK			30
+#define GCC_CAMSS_CCI_CLK			31
+#define GCC_CAMSS_CPP_AHB_CLK			32
+#define GCC_CAMSS_CPP_AXI_CLK			33
+#define GCC_CAMSS_CPP_CLK			34
+#define GCC_CAMSS_CSI0_AHB_CLK			35
+#define GCC_CAMSS_CSI0_CLK			36
+#define GCC_CAMSS_CSI0PHY_CLK			37
+#define GCC_CAMSS_CSI0PIX_CLK			38
+#define GCC_CAMSS_CSI0RDI_CLK			39
+#define GCC_CAMSS_CSI1_AHB_CLK			40
+#define GCC_CAMSS_CSI1_CLK			41
+#define GCC_CAMSS_CSI1PHY_CLK			42
+#define GCC_CAMSS_CSI1PIX_CLK			43
+#define GCC_CAMSS_CSI1RDI_CLK			44
+#define GCC_CAMSS_CSI2_AHB_CLK			45
+#define GCC_CAMSS_CSI2_CLK			46
+#define GCC_CAMSS_CSI2PHY_CLK			47
+#define GCC_CAMSS_CSI2PIX_CLK			48
+#define GCC_CAMSS_CSI2RDI_CLK			49
+#define GCC_CAMSS_CSI_VFE0_CLK			50
+#define GCC_CAMSS_CSI_VFE1_CLK			51
+#define GCC_CAMSS_GP0_CLK			52
+#define GCC_CAMSS_GP1_CLK			53
+#define GCC_CAMSS_ISPIF_AHB_CLK			54
+#define GCC_CAMSS_JPEG0_CLK			55
+#define GCC_CAMSS_JPEG_AHB_CLK			56
+#define GCC_CAMSS_JPEG_AXI_CLK			57
+#define GCC_CAMSS_MCLK0_CLK			58
+#define GCC_CAMSS_MCLK1_CLK			59
+#define GCC_CAMSS_MCLK2_CLK			60
+#define GCC_CAMSS_MICRO_AHB_CLK			61
+#define GCC_CAMSS_CSI0PHYTIMER_CLK		62
+#define GCC_CAMSS_CSI1PHYTIMER_CLK		63
+#define GCC_CAMSS_AHB_CLK			64
+#define GCC_CAMSS_TOP_AHB_CLK			65
+#define GCC_CAMSS_VFE0_CLK			66
+#define GCC_CAMSS_VFE_AHB_CLK			67
+#define GCC_CAMSS_VFE_AXI_CLK			68
+#define GCC_CAMSS_VFE1_AHB_CLK			69
+#define GCC_CAMSS_VFE1_AXI_CLK			70
+#define GCC_CAMSS_VFE1_CLK			71
+#define GCC_DCC_CLK				72
+#define GCC_GP1_CLK				73
+#define GCC_GP2_CLK				74
+#define GCC_GP3_CLK				75
+#define GCC_MDSS_AHB_CLK			76
+#define GCC_MDSS_AXI_CLK			77
+#define GCC_MDSS_ESC0_CLK			78
+#define GCC_MDSS_ESC1_CLK			79
+#define GCC_MDSS_MDP_CLK			80
+#define GCC_MDSS_VSYNC_CLK			81
+#define GCC_MSS_CFG_AHB_CLK			82
+#define GCC_MSS_Q6_BIMC_AXI_CLK			83
+#define GCC_PDM2_CLK				84
+#define GCC_PRNG_AHB_CLK			85
+#define GCC_PDM_AHB_CLK				86
+#define GCC_RBCPR_GFX_AHB_CLK			87
+#define GCC_RBCPR_GFX_CLK			88
+#define GCC_SDCC1_AHB_CLK			89
+#define GCC_SDCC1_APPS_CLK			90
+#define GCC_SDCC1_ICE_CORE_CLK			91
+#define GCC_SDCC2_AHB_CLK			92
+#define GCC_SDCC2_APPS_CLK			93
+#define GCC_SDCC3_AHB_CLK			94
+#define GCC_SDCC3_APPS_CLK			95
+#define GCC_USB2A_PHY_SLEEP_CLK			96
+#define GCC_USB_HS_PHY_CFG_AHB_CLK		97
+#define GCC_USB_FS_AHB_CLK			98
+#define GCC_USB_FS_IC_CLK			99
+#define GCC_USB_FS_SYSTEM_CLK			100
+#define GCC_USB_HS_AHB_CLK			101
+#define GCC_USB_HS_SYSTEM_CLK			102
+#define GCC_VENUS0_AHB_CLK			103
+#define GCC_VENUS0_AXI_CLK			104
+#define GCC_VENUS0_CORE0_VCODEC0_CLK		105
+#define GCC_VENUS0_CORE1_VCODEC0_CLK		106
+#define GCC_VENUS0_VCODEC0_CLK			107
+#define GCC_APSS_AHB_CLK			108
+#define GCC_APSS_AXI_CLK			109
+#define GCC_BLSP1_AHB_CLK			110
+#define GCC_BLSP2_AHB_CLK			111
+#define GCC_BOOT_ROM_AHB_CLK			112
+#define GCC_CRYPTO_AHB_CLK			113
+#define GCC_CRYPTO_AXI_CLK			114
+#define GCC_CRYPTO_CLK				115
+#define GCC_CPP_TBU_CLK				116
+#define GCC_APSS_TCU_CLK			117
+#define GCC_JPEG_TBU_CLK			118
+#define GCC_MDP_RT_TBU_CLK			119
+#define GCC_MDP_TBU_CLK				120
+#define GCC_SMMU_CFG_CLK			121
+#define GCC_VENUS_1_TBU_CLK			122
+#define GCC_VENUS_TBU_CLK			123
+#define GCC_VFE1_TBU_CLK			124
+#define GCC_VFE_TBU_CLK				125
+#define GCC_APS_0_CLK				126
+#define GCC_APS_1_CLK				127
+#define APS_0_CLK_SRC				128
+#define APS_1_CLK_SRC				129
+#define APSS_AHB_CLK_SRC			130
+#define BLSP1_QUP1_I2C_APPS_CLK_SRC		131
+#define BLSP1_QUP1_SPI_APPS_CLK_SRC		132
+#define BLSP1_QUP2_I2C_APPS_CLK_SRC		133
+#define BLSP1_QUP2_SPI_APPS_CLK_SRC		134
+#define BLSP1_QUP3_I2C_APPS_CLK_SRC		135
+#define BLSP1_QUP3_SPI_APPS_CLK_SRC		136
+#define BLSP1_QUP4_I2C_APPS_CLK_SRC		137
+#define BLSP1_QUP4_SPI_APPS_CLK_SRC		138
+#define BLSP1_UART1_APPS_CLK_SRC		139
+#define BLSP1_UART2_APPS_CLK_SRC		140
+#define BLSP2_QUP1_I2C_APPS_CLK_SRC		141
+#define BLSP2_QUP1_SPI_APPS_CLK_SRC		142
+#define BLSP2_QUP2_I2C_APPS_CLK_SRC		143
+#define BLSP2_QUP2_SPI_APPS_CLK_SRC		144
+#define BLSP2_QUP3_I2C_APPS_CLK_SRC		145
+#define BLSP2_QUP3_SPI_APPS_CLK_SRC		146
+#define BLSP2_QUP4_I2C_APPS_CLK_SRC		147
+#define BLSP2_QUP4_SPI_APPS_CLK_SRC		148
+#define BLSP2_UART1_APPS_CLK_SRC		149
+#define BLSP2_UART2_APPS_CLK_SRC		150
+#define CCI_CLK_SRC				151
+#define CPP_CLK_SRC				152
+#define CSI0_CLK_SRC				153
+#define CSI1_CLK_SRC				154
+#define CSI2_CLK_SRC				155
+#define CAMSS_GP0_CLK_SRC			156
+#define CAMSS_GP1_CLK_SRC			157
+#define JPEG0_CLK_SRC				158
+#define MCLK0_CLK_SRC				159
+#define MCLK1_CLK_SRC				160
+#define MCLK2_CLK_SRC				161
+#define CSI0PHYTIMER_CLK_SRC			162
+#define CSI1PHYTIMER_CLK_SRC			163
+#define CAMSS_TOP_AHB_CLK_SRC			164
+#define VFE0_CLK_SRC				165
+#define VFE1_CLK_SRC				166
+#define CRYPTO_CLK_SRC				167
+#define GP1_CLK_SRC				168
+#define GP2_CLK_SRC				169
+#define GP3_CLK_SRC				170
+#define ESC0_CLK_SRC				171
+#define ESC1_CLK_SRC				172
+#define MDP_CLK_SRC				173
+#define VSYNC_CLK_SRC				174
+#define PDM2_CLK_SRC				175
+#define RBCPR_GFX_CLK_SRC			176
+#define SDCC1_APPS_CLK_SRC			177
+#define SDCC1_ICE_CORE_CLK_SRC			178
+#define SDCC2_APPS_CLK_SRC			179
+#define SDCC3_APPS_CLK_SRC			180
+#define USB_FS_IC_CLK_SRC			181
+#define USB_FS_SYSTEM_CLK_SRC			182
+#define USB_HS_SYSTEM_CLK_SRC			183
+#define VCODEC0_CLK_SRC				184
+#define GCC_MDSS_BYTE0_CLK_SRC			185
+#define GCC_MDSS_BYTE1_CLK_SRC			186
+#define GCC_MDSS_BYTE0_CLK			187
+#define GCC_MDSS_BYTE1_CLK			188
+#define GCC_MDSS_PCLK0_CLK_SRC			189
+#define GCC_MDSS_PCLK1_CLK_SRC			190
+#define GCC_MDSS_PCLK0_CLK			191
+#define GCC_MDSS_PCLK1_CLK			192
+#define GCC_GFX3D_CLK_SRC			193
+#define GCC_GFX3D_OXILI_CLK			194
+#define GCC_GFX3D_BIMC_CLK			195
+#define GCC_GFX3D_OXILI_AHB_CLK			196
+#define GCC_GFX3D_OXILI_AON_CLK			197
+#define GCC_GFX3D_OXILI_GMEM_CLK		198
+#define GCC_GFX3D_OXILI_TIMER_CLK		199
+#define GCC_GFX3D_TBU0_CLK			200
+#define GCC_GFX3D_TBU1_CLK			201
+#define GCC_GFX3D_TCU_CLK			202
+#define GCC_GFX3D_GTCU_AHB_CLK			203
+
+/* GCC block resets */
+#define RST_CAMSS_MICRO_BCR			0
+#define RST_USB_HS_BCR				1
+#define RST_QUSB2_PHY_BCR			2
+#define RST_USB2_HS_PHY_ONLY_BCR		3
+#define RST_USB_HS_PHY_CFG_AHB_BCR		4
+#define RST_USB_FS_BCR				5
+#define RST_CAMSS_CSI1PIX_BCR			6
+#define RST_CAMSS_CSI_VFE1_BCR			7
+#define RST_CAMSS_VFE1_BCR			8
+#define RST_CAMSS_CPP_BCR			9
+
+/* GDSCs */
+#define VENUS_GDSC				0
+#define VENUS_CORE0_GDSC			1
+#define VENUS_CORE1_GDSC			2
+#define MDSS_GDSC				3
+#define JPEG_GDSC				4
+#define VFE0_GDSC				5
+#define VFE1_GDSC				6
+#define CPP_GDSC				7
+#define OXILI_GX_GDSC				8
+#define OXILI_CX_GDSC				9
+
+#endif /* _DT_BINDINGS_CLK_MSM_GCC_8976_H */
-- 
cgit v1.2.3


From 85f5a74c2b9ba213d4102dc12ccbfdbe26472abb Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 8 Apr 2021 01:33:45 -0400
Subject: block: Add bio_add_folio()

This is a thin wrapper around bio_add_page().  The main advantage here
is the documentation that folios larger than 2GiB are not supported.
It's not currently possible to allocate folios that large, but if it
ever becomes possible, this function will fail gracefully instead of
doing I/O to the wrong bytes.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/bio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index fe6bdfbbef66..a783cac49978 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -409,7 +409,8 @@ extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
-extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
+int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off);
+bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 int bio_add_zone_append_page(struct bio *bio, struct page *page,
-- 
cgit v1.2.3


From 640d1930bef4f87ec8d8d2b05f0f6edc1dfcf662 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 4 Jan 2021 10:58:17 -0500
Subject: block: Add bio_for_each_folio_all()

Allow callers to iterate over each folio instead of each page.  The
bio need not have been constructed using folios originally.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/bio.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index a783cac49978..e3c9e8207f12 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -166,7 +166,7 @@ static inline void bio_advance(struct bio *bio, unsigned int nbytes)
  */
 #define bio_for_each_bvec_all(bvl, bio, i)		\
 	for (i = 0, bvl = bio_first_bvec_all(bio);	\
-	     i < (bio)->bi_vcnt; i++, bvl++)		\
+	     i < (bio)->bi_vcnt; i++, bvl++)
 
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
@@ -260,6 +260,57 @@ static inline struct bio_vec *bio_last_bvec_all(struct bio *bio)
 	return &bio->bi_io_vec[bio->bi_vcnt - 1];
 }
 
+/**
+ * struct folio_iter - State for iterating all folios in a bio.
+ * @folio: The current folio we're iterating.  NULL after the last folio.
+ * @offset: The byte offset within the current folio.
+ * @length: The number of bytes in this iteration (will not cross folio
+ *	boundary).
+ */
+struct folio_iter {
+	struct folio *folio;
+	size_t offset;
+	size_t length;
+	/* private: for use by the iterator */
+	size_t _seg_count;
+	int _i;
+};
+
+static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
+				   int i)
+{
+	struct bio_vec *bvec = bio_first_bvec_all(bio) + i;
+
+	fi->folio = page_folio(bvec->bv_page);
+	fi->offset = bvec->bv_offset +
+			PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
+	fi->_seg_count = bvec->bv_len;
+	fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count);
+	fi->_i = i;
+}
+
+static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
+{
+	fi->_seg_count -= fi->length;
+	if (fi->_seg_count) {
+		fi->folio = folio_next(fi->folio);
+		fi->offset = 0;
+		fi->length = min(folio_size(fi->folio), fi->_seg_count);
+	} else if (fi->_i + 1 < bio->bi_vcnt) {
+		bio_first_folio(fi, bio, fi->_i + 1);
+	} else {
+		fi->folio = NULL;
+	}
+}
+
+/**
+ * bio_for_each_folio_all - Iterate over each folio in a bio.
+ * @fi: struct folio_iter which is updated for each folio.
+ * @bio: struct bio to iterate over.
+ */
+#define bio_for_each_folio_all(fi, bio)				\
+	for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio))
+
 enum bip_flags {
 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
-- 
cgit v1.2.3


From 8306a5f56305521d8b307b4ee1f69949fbb49279 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 28 Apr 2021 07:51:36 -0400
Subject: iomap: Add iomap_invalidate_folio

Keep iomap_invalidatepage around as a wrapper for use in address_space
operations.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/iomap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6d1b08d0ae93..29491fb9c5ba 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -225,6 +225,7 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 int iomap_is_partially_uptodate(struct page *page, unsigned long from,
 		unsigned long count);
 int iomap_releasepage(struct page *page, gfp_t gfp_mask);
+void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
 void iomap_invalidatepage(struct page *page, unsigned int offset,
 		unsigned int len);
 #ifdef CONFIG_MIGRATION
-- 
cgit v1.2.3


From ea6fa4961aab8f90a8aa03575a98b4bda368d4b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= <mat.jonczyk@o2.pl>
Date: Fri, 10 Dec 2021 21:01:26 +0100
Subject: rtc: mc146818-lib: fix RTC presence check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To prevent an infinite loop in mc146818_get_time(),
commit 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
added a check for RTC availability. Together with a later fix, it
checked if bit 6 in register 0x0d is cleared.

This, however, caused a false negative on a motherboard with an AMD
SB710 southbridge; according to the specification [1], bit 6 of register
0x0d of this chipset is a scratchbit. This caused a regression in Linux
5.11 - the RTC was determined broken by the kernel and not used by
rtc-cmos.c [3]. This problem was also reported in Fedora [4].

As a better alternative, check whether the UIP ("Update-in-progress")
bit is set for longer then 10ms. If that is the case, then apparently
the RTC is either absent (and all register reads return 0xff) or broken.
Also limit the number of loop iterations in mc146818_get_time() to 10 to
prevent an infinite loop there.

The functions mc146818_get_time() and mc146818_does_rtc_work() will be
refactored later in this patch series, in order to fix a separate
problem with reading / setting the RTC alarm time. This is done so to
avoid a confusion about what is being fixed when.

In a previous approach to this problem, I implemented a check whether
the RTC_HOURS register contains a value <= 24. This, however, sometimes
did not work correctly on my Intel Kaby Lake laptop. According to
Intel's documentation [2], "the time and date RAM locations (0-9) are
disconnected from the external bus" during the update cycle so reading
this register without checking the UIP bit is incorrect.

[1] AMD SB700/710/750 Register Reference Guide, page 308,
https://developer.amd.com/wordpress/media/2012/10/43009_sb7xx_rrg_pub_1.00.pdf

[2] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet
Volume 1 of 2, page 209
Intel's Document Number: 334658-006,
https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf

[3] Functions in arch/x86/kernel/rtc.c apparently were using it.

[4] https://bugzilla.redhat.com/show_bug.cgi?id=1936688

Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
Fixes: ebb22a059436 ("rtc: mc146818: Dont test for bit 0-5 in Register D")
Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211210200131.153887-5-mat.jonczyk@o2.pl
---
 include/linux/mc146818rtc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 0661af17a758..69c80c4325bf 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -123,6 +123,7 @@ struct cmos_rtc_board_info {
 #define RTC_IO_EXTENT_USED      RTC_IO_EXTENT
 #endif /* ARCH_RTC_LOCATION */
 
+bool mc146818_does_rtc_work(void);
 unsigned int mc146818_get_time(struct rtc_time *time);
 int mc146818_set_time(struct rtc_time *time);
 
-- 
cgit v1.2.3


From ec5895c0f2d87b9bf4185db1915e40fa6fcfc0ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= <mat.jonczyk@o2.pl>
Date: Fri, 10 Dec 2021 21:01:27 +0100
Subject: rtc: mc146818-lib: extract mc146818_avoid_UIP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Function mc146818_get_time() contains an elaborate mechanism of reading
the RTC time while no RTC update is in progress. It turns out that
reading the RTC alarm clock also requires avoiding the RTC update.
Therefore, the mechanism in mc146818_get_time() should be reused - so
extract it into a separate function.

The logic in mc146818_avoid_UIP() is same as in mc146818_get_time()
except that after every

        if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {

there is now "mdelay(1)".

To avoid producing a very unreadable patch, mc146818_get_time() will be
refactored to use mc146818_avoid_UIP() in the next patch.

Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211210200131.153887-6-mat.jonczyk@o2.pl
---
 include/linux/mc146818rtc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 69c80c4325bf..67fb0a12becc 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -127,4 +127,7 @@ bool mc146818_does_rtc_work(void);
 unsigned int mc146818_get_time(struct rtc_time *time);
 int mc146818_set_time(struct rtc_time *time);
 
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+			void *param);
+
 #endif /* _MC146818RTC_H */
-- 
cgit v1.2.3


From 34fff62827b254f8a43633cc878deb04bf11297c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:18:54 +0100
Subject: device: Move MSI related data into a struct

The only unconditional part of MSI data in struct device is the irqdomain
pointer. Everything else can be allocated on demand. Create a data
structure and move the irqdomain pointer into it. The other MSI specific
parts are going to be removed from struct device in later steps.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20211210221813.617178827@linutronix.de
---
 include/linux/device.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 2a22875238a6..f212b7a7b156 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -371,6 +371,16 @@ struct dev_links_info {
 	enum dl_dev_state status;
 };
 
+/**
+ * struct dev_msi_info - Device data related to MSI
+ * @domain:	The MSI interrupt domain associated to the device
+ */
+struct dev_msi_info {
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+	struct irq_domain	*domain;
+#endif
+};
+
 /**
  * struct device - The basic device structure
  * @parent:	The device's "parent" device, the device to which it is attached.
@@ -407,8 +417,8 @@ struct dev_links_info {
  * @em_pd:	device's energy model performance domain
  * @pins:	For device pin management.
  *		See Documentation/driver-api/pin-control.rst for details.
+ * @msi:	MSI related data
  * @msi_list:	Hosts MSI descriptors
- * @msi_domain: The generic MSI domain this device is using.
  * @numa_node:	NUMA node this device is close to.
  * @dma_ops:    DMA mapping operations for this device.
  * @dma_mask:	Dma mask (if dma'ble device).
@@ -500,12 +510,10 @@ struct device {
 	struct em_perf_domain	*em_pd;
 #endif
 
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
-	struct irq_domain	*msi_domain;
-#endif
 #ifdef CONFIG_PINCTRL
 	struct dev_pin_info	*pins;
 #endif
+	struct dev_msi_info	msi;
 #ifdef CONFIG_GENERIC_MSI_IRQ
 	struct list_head	msi_list;
 #endif
@@ -666,7 +674,7 @@ static inline void set_dev_node(struct device *dev, int node)
 static inline struct irq_domain *dev_get_msi_domain(const struct device *dev)
 {
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
-	return dev->msi_domain;
+	return dev->msi.domain;
 #else
 	return NULL;
 #endif
@@ -675,7 +683,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev)
 static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d)
 {
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
-	dev->msi_domain = d;
+	dev->msi.domain = d;
 #endif
 }
 
-- 
cgit v1.2.3


From 013bd8e543c2c777b586cf033c588ea82bd502db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:18:55 +0100
Subject: device: Add device:: Msi_data pointer and struct msi_device_data

Create struct msi_device_data and add a pointer of that type to struct
dev_msi_info, which is part of struct device. Provide an allocator function
which can be invoked from the MSI interrupt allocation code pathes.

Add a properties field to the data structure as a first member so the
allocation size is not zero bytes. The field will be uses later on.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221813.676660809@linutronix.de
---
 include/linux/device.h |  5 +++++
 include/linux/msi.h    | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index f212b7a7b156..f0033cd93631 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -45,6 +45,7 @@ struct iommu_ops;
 struct iommu_group;
 struct dev_pin_info;
 struct dev_iommu;
+struct msi_device_data;
 
 /**
  * struct subsys_interface - interfaces to device functions
@@ -374,11 +375,15 @@ struct dev_links_info {
 /**
  * struct dev_msi_info - Device data related to MSI
  * @domain:	The MSI interrupt domain associated to the device
+ * @data:	Pointer to MSI device data
  */
 struct dev_msi_info {
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
 	struct irq_domain	*domain;
 #endif
+#ifdef CONFIG_GENERIC_MSI_IRQ
+	struct msi_device_data	*data;
+#endif
 };
 
 /**
diff --git a/include/linux/msi.h b/include/linux/msi.h
index ba4a39c430b5..7e4c8fd7c65d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -171,6 +171,16 @@ struct msi_desc {
 	};
 };
 
+/**
+ * msi_device_data - MSI per device data
+ * @properties:		MSI properties which are interesting to drivers
+ */
+struct msi_device_data {
+	unsigned long			properties;
+};
+
+int msi_setup_device_data(struct device *dev);
+
 /* Helpers to hide struct msi_desc implementation details */
 #define msi_desc_to_dev(desc)		((desc)->dev)
 #define dev_to_msi_list(dev)		(&(dev)->msi_list)
@@ -233,10 +243,16 @@ void pci_msi_mask_irq(struct irq_data *data);
 void pci_msi_unmask_irq(struct irq_data *data);
 
 #ifdef CONFIG_SYSFS
+int msi_device_populate_sysfs(struct device *dev);
+void msi_device_destroy_sysfs(struct device *dev);
+
 const struct attribute_group **msi_populate_sysfs(struct device *dev);
 void msi_destroy_sysfs(struct device *dev,
 		       const struct attribute_group **msi_irq_groups);
 #else
+static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
+static inline void msi_device_destroy_sysfs(struct device *dev) { }
+
 static inline const struct attribute_group **msi_populate_sysfs(struct device *dev)
 {
 	return NULL;
@@ -384,6 +400,8 @@ enum {
 	MSI_FLAG_MUST_REACTIVATE	= (1 << 5),
 	/* Is level-triggered capable, using two messages */
 	MSI_FLAG_LEVEL_CAPABLE		= (1 << 6),
+	/* Populate sysfs on alloc() and destroy it on free() */
+	MSI_FLAG_DEV_SYSFS		= (1 << 7),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
-- 
cgit v1.2.3


From 3f35d2cf9fbc656db82579d849cc69c373b1ad0d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Dec 2021 18:16:44 +0100
Subject: PCI/MSI: Decouple MSI[-X] disable from pcim_release()

The MSI core will introduce runtime allocation of MSI related data. This
data will be devres managed and has to be set up before enabling
PCI/MSI[-X]. This would introduce an ordering issue vs. pcim_release().

The setup order is:

   pcim_enable_device()
	devres_alloc(pcim_release...);
	...
	pci_irq_alloc()
	  msi_setup_device_data()
	     devres_alloc(msi_device_data_release, ...)

and once the device is released these release functions are invoked in the
opposite order:

    msi_device_data_release()
    ...
    pcim_release()
       pci_disable_msi[x]()

which is obviously wrong, because pci_disable_msi[x]() requires the MSI
data to be available to tear down the MSI[-X] interrupts.

Remove the MSI[-X] teardown from pcim_release() and add an explicit action
to be installed on the attempt of enabling PCI/MSI[-X].

This allows the MSI core data allocation to be ordered correctly in a
subsequent step.

Reported-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/87tuf9rdoj.ffs@tglx
---
 include/linux/pci.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5cc46baef519..a09736d3e05e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -425,7 +425,8 @@ struct pci_dev {
 	unsigned int	ats_enabled:1;		/* Address Translation Svc */
 	unsigned int	pasid_enabled:1;	/* Process Address Space ID */
 	unsigned int	pri_enabled:1;		/* Page Request Interface */
-	unsigned int	is_managed:1;
+	unsigned int	is_managed:1;		/* Managed via devres */
+	unsigned int	is_msi_managed:1;	/* MSI release via devres installed */
 	unsigned int	needs_freset:1;		/* Requires fundamental reset */
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
-- 
cgit v1.2.3


From bf6e054e0e3fbc9614355b760e18c8a14f952a4e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:03 +0100
Subject: genirq/msi: Provide msi_device_populate/destroy_sysfs()

Add new allocation functions which can be activated by domain info
flags. They store the groups pointer in struct msi_device_data.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221813.988659194@linutronix.de
---
 include/linux/msi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7e4c8fd7c65d..1b96dc483b88 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -56,6 +56,8 @@ struct irq_data;
 struct msi_desc;
 struct pci_dev;
 struct platform_msi_priv_data;
+struct attribute_group;
+
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 #ifdef CONFIG_GENERIC_MSI_IRQ
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
@@ -174,9 +176,11 @@ struct msi_desc {
 /**
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
+ * @attrs:		Pointer to the sysfs attribute group
  */
 struct msi_device_data {
 	unsigned long			properties;
+	const struct attribute_group    **attrs;
 };
 
 int msi_setup_device_data(struct device *dev);
-- 
cgit v1.2.3


From ffd84485e6beb9cad3e5a133d88201b995298c33 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:05 +0100
Subject: PCI/MSI: Let the irq code handle sysfs groups

Set the domain info flag which makes the core code handle sysfs groups and
put an explicit invocation into the legacy code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211210221814.048612053@linutronix.de
---
 include/linux/pci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index a09736d3e05e..0a7b6b2f163b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -476,7 +476,6 @@ struct pci_dev {
 #ifdef CONFIG_PCI_MSI
 	void __iomem	*msix_base;
 	raw_spinlock_t	msi_lock;
-	const struct attribute_group **msi_irq_groups;
 #endif
 	struct pci_vpd	vpd;
 #ifdef CONFIG_PCIE_DPC
-- 
cgit v1.2.3


From 24cff375fdb663c2238f06693a067b9219596fdc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:08 +0100
Subject: genirq/msi: Remove the original sysfs interfaces

No more users. Refactor the core code accordingly and move the global
interface under CONFIG_PCI_MSI_ARCH_FALLBACKS.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.168362229@linutronix.de
---
 include/linux/msi.h | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1b96dc483b88..634a12962e72 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -249,22 +249,10 @@ void pci_msi_unmask_irq(struct irq_data *data);
 #ifdef CONFIG_SYSFS
 int msi_device_populate_sysfs(struct device *dev);
 void msi_device_destroy_sysfs(struct device *dev);
-
-const struct attribute_group **msi_populate_sysfs(struct device *dev);
-void msi_destroy_sysfs(struct device *dev,
-		       const struct attribute_group **msi_irq_groups);
-#else
+#else /* CONFIG_SYSFS */
 static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
 static inline void msi_device_destroy_sysfs(struct device *dev) { }
-
-static inline const struct attribute_group **msi_populate_sysfs(struct device *dev)
-{
-	return NULL;
-}
-static inline void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups)
-{
-}
-#endif
+#endif /* !CONFIG_SYSFS */
 
 /*
  * The arch hooks to setup up msi irqs. Default functions are implemented
@@ -279,7 +267,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
-#endif
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
 
 /*
  * The restore hook is still available even for fully irq domain based
-- 
cgit v1.2.3


From 9835cec6d557b0bff3d48bd91cd0484aba59386c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:09 +0100
Subject: platform-msi: Rename functions and clarify comments

It's hard to distinguish what platform_msi_domain_alloc() and
platform_msi_domain_alloc_irqs() are about. Make the distinction more
explicit and add comments which explain the use cases properly.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.228706214@linutronix.de
---
 include/linux/msi.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 634a12962e72..12dd28629c43 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -435,10 +435,10 @@ __platform_msi_create_device_domain(struct device *dev,
 #define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \
 	__platform_msi_create_device_domain(dev, nvec, true, write, ops, data)
 
-int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs);
-void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nvec);
+int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				     unsigned int nr_irqs);
+void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
+				     unsigned int nvec);
 void *platform_msi_get_host_data(struct irq_domain *domain);
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
 
-- 
cgit v1.2.3


From fc22e7dbcdb3e06a3d3ce05fc91c6a2345411f9b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:11 +0100
Subject: platform-msi: Store platform private data pointer in msi_device_data

Storing the platform private data in a MSI descriptor is sloppy at
best. The data belongs to the device and not to the descriptor.
Add a pointer to struct msi_device_data and store the pointer there.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.287680528@linutronix.de
---
 include/linux/msi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 12dd28629c43..cdf0d09c3ad4 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -108,11 +108,9 @@ struct pci_msi_desc {
 
 /**
  * platform_msi_desc - Platform device specific msi descriptor data
- * @msi_priv_data:	Pointer to platform private data
  * @msi_index:		The index of the MSI descriptor for multi MSI
  */
 struct platform_msi_desc {
-	struct platform_msi_priv_data	*msi_priv_data;
 	u16				msi_index;
 };
 
@@ -177,10 +175,12 @@ struct msi_desc {
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
  * @attrs:		Pointer to the sysfs attribute group
+ * @platform_data:	Platform-MSI specific data
  */
 struct msi_device_data {
 	unsigned long			properties;
 	const struct attribute_group    **attrs;
+	struct platform_msi_priv_data	*platform_data;
 };
 
 int msi_setup_device_data(struct device *dev);
-- 
cgit v1.2.3


From 20c6d424cfe641659c9a025db8a8608701b27246 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:12 +0100
Subject: genirq/msi: Consolidate MSI descriptor data

All non PCI/MSI usage variants have data structures in struct msi_desc with
only one member: xxx_index. PCI/MSI has a entry_nr member.

Add a common msi_index member to struct msi_desc so all implementations can
share it which allows further consolidation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.350967317@linutronix.de
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index cdf0d09c3ad4..ee8fe49dedd1 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -143,6 +143,7 @@ struct ti_sci_inta_msi_desc {
  *			address or data changes
  * @write_msi_msg_data:	Data parameter for the callback.
  *
+ * @msi_index:	Index of the msi descriptor
  * @pci:	[PCI]	    PCI speficic msi descriptor data
  * @platform:	[platform]  Platform device specific msi descriptor data
  * @fsl_mc:	[fsl-mc]    FSL MC device specific msi descriptor data
@@ -163,6 +164,7 @@ struct msi_desc {
 	void (*write_msi_msg)(struct msi_desc *entry, void *data);
 	void *write_msi_msg_data;
 
+	u16				msi_index;
 	union {
 		struct pci_msi_desc		pci;
 		struct platform_msi_desc	platform;
-- 
cgit v1.2.3


From dba27c7fa36f468e7eb29b216879f8c33bf0955d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:14 +0100
Subject: platform-msi: Use msi_desc::msi_index

Use the common msi_index member and get rid of the pointless wrapper struct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.413638645@linutronix.de
---
 include/linux/msi.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index ee8fe49dedd1..1d85e954e130 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -106,14 +106,6 @@ struct pci_msi_desc {
 	};
 };
 
-/**
- * platform_msi_desc - Platform device specific msi descriptor data
- * @msi_index:		The index of the MSI descriptor for multi MSI
- */
-struct platform_msi_desc {
-	u16				msi_index;
-};
-
 /**
  * fsl_mc_msi_desc - FSL-MC device specific msi descriptor data
  * @msi_index:		The index of the MSI descriptor
@@ -145,7 +137,6 @@ struct ti_sci_inta_msi_desc {
  *
  * @msi_index:	Index of the msi descriptor
  * @pci:	[PCI]	    PCI speficic msi descriptor data
- * @platform:	[platform]  Platform device specific msi descriptor data
  * @fsl_mc:	[fsl-mc]    FSL MC device specific msi descriptor data
  * @inta:	[INTA]	    TISCI based INTA specific msi descriptor data
  */
@@ -167,7 +158,6 @@ struct msi_desc {
 	u16				msi_index;
 	union {
 		struct pci_msi_desc		pci;
-		struct platform_msi_desc	platform;
 		struct fsl_mc_msi_desc		fsl_mc;
 		struct ti_sci_inta_msi_desc	inta;
 	};
-- 
cgit v1.2.3


From 78ee9fb4b8b126ed84a819a6e1732fd3039b525a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:15 +0100
Subject: bus: fsl-mc-msi: Use msi_desc::msi_index

Use the common msi_index member and get rid of the pointless wrapper struct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.477386185@linutronix.de
---
 include/linux/msi.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1d85e954e130..25edf83ede41 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -106,14 +106,6 @@ struct pci_msi_desc {
 	};
 };
 
-/**
- * fsl_mc_msi_desc - FSL-MC device specific msi descriptor data
- * @msi_index:		The index of the MSI descriptor
- */
-struct fsl_mc_msi_desc {
-	u16				msi_index;
-};
-
 /**
  * ti_sci_inta_msi_desc - TISCI based INTA specific msi descriptor data
  * @dev_index: TISCI device index
@@ -137,7 +129,6 @@ struct ti_sci_inta_msi_desc {
  *
  * @msi_index:	Index of the msi descriptor
  * @pci:	[PCI]	    PCI speficic msi descriptor data
- * @fsl_mc:	[fsl-mc]    FSL MC device specific msi descriptor data
  * @inta:	[INTA]	    TISCI based INTA specific msi descriptor data
  */
 struct msi_desc {
@@ -158,7 +149,6 @@ struct msi_desc {
 	u16				msi_index;
 	union {
 		struct pci_msi_desc		pci;
-		struct fsl_mc_msi_desc		fsl_mc;
 		struct ti_sci_inta_msi_desc	inta;
 	};
 };
-- 
cgit v1.2.3


From 0f18095871fc59c89a281caf6f18538cf9e50fbf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:17 +0100
Subject: soc: ti: ti_sci_inta_msi: Use msi_desc::msi_index

Use the common msi_index member and get rid of the pointless wrapper struct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Nishanth Menon <nm@ti.com>
Link: https://lore.kernel.org/r/20211210221814.540704224@linutronix.de
---
 include/linux/msi.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 25edf83ede41..45ec5d07a5f3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -106,14 +106,6 @@ struct pci_msi_desc {
 	};
 };
 
-/**
- * ti_sci_inta_msi_desc - TISCI based INTA specific msi descriptor data
- * @dev_index: TISCI device index
- */
-struct ti_sci_inta_msi_desc {
-	u16	dev_index;
-};
-
 /**
  * struct msi_desc - Descriptor structure for MSI based interrupts
  * @list:	List head for management
@@ -128,8 +120,7 @@ struct ti_sci_inta_msi_desc {
  * @write_msi_msg_data:	Data parameter for the callback.
  *
  * @msi_index:	Index of the msi descriptor
- * @pci:	[PCI]	    PCI speficic msi descriptor data
- * @inta:	[INTA]	    TISCI based INTA specific msi descriptor data
+ * @pci:	PCI specific msi descriptor data
  */
 struct msi_desc {
 	/* Shared device/bus type independent data */
@@ -147,10 +138,7 @@ struct msi_desc {
 	void *write_msi_msg_data;
 
 	u16				msi_index;
-	union {
-		struct pci_msi_desc		pci;
-		struct ti_sci_inta_msi_desc	inta;
-	};
+	struct pci_msi_desc		pci;
 };
 
 /**
-- 
cgit v1.2.3


From 173ffad79d177d9a91fbf3be6bf67ca81e0f765a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:18 +0100
Subject: PCI/MSI: Use msi_desc::msi_index

The usage of msi_desc::pci::entry_nr is confusing at best. It's the index
into the MSI[X] descriptor table.

Use msi_desc::msi_index which is shared between all MSI incarnations
instead of having a PCI specific storage for no value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211210221814.602911509@linutronix.de
---
 include/linux/msi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 45ec5d07a5f3..b3d3b0bf59fe 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -80,7 +80,6 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc,
  * @multi_cap:	[PCI MSI/X] log2 num of messages supported
  * @can_mask:	[PCI MSI/X] Masking supported?
  * @is_64:	[PCI MSI/X] Address size: 0=32bit 1=64bit
- * @entry_nr:	[PCI MSI/X] Entry which is described by this descriptor
  * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq
  * @mask_pos:	[PCI MSI]   Mask register position
  * @mask_base:	[PCI MSI-X] Mask register base address
@@ -97,7 +96,6 @@ struct pci_msi_desc {
 		u8	can_mask	: 1;
 		u8	is_64		: 1;
 		u8	is_virtual	: 1;
-		u16	entry_nr;
 		unsigned default_irq;
 	} msi_attrib;
 	union {
-- 
cgit v1.2.3


From 7a823443e9b4ed1ff4a3026d184f09d23fd6d9c9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:20 +0100
Subject: PCI/MSI: Provide MSI_FLAG_MSIX_CONTIGUOUS

Provide a domain info flag which makes the core code check for a contiguous
MSI-X index on allocation. That's simpler than checking it at some other
domain callback in architecture code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20211210221814.662401116@linutronix.de
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index b3d3b0bf59fe..d206239e6fa8 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -362,6 +362,8 @@ enum {
 	MSI_FLAG_LEVEL_CAPABLE		= (1 << 6),
 	/* Populate sysfs on alloc() and destroy it on free() */
 	MSI_FLAG_DEV_SYSFS		= (1 << 7),
+	/* MSI-X entries must be contiguous */
+	MSI_FLAG_MSIX_CONTIGUOUS	= (1 << 8),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
-- 
cgit v1.2.3


From cf15f43acaad31dabb2646cef170a506a1d663eb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:23 +0100
Subject: genirq/msi: Provide interface to retrieve Linux interrupt number

This allows drivers to retrieve the Linux interrupt number instead of
fiddling with MSI descriptors.

msi_get_virq() returns the Linux interrupt number or 0 in case that there
is no entry for the given MSI index.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221814.780824745@linutronix.de
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index d206239e6fa8..7593fc383dba 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -153,6 +153,8 @@ struct msi_device_data {
 
 int msi_setup_device_data(struct device *dev);
 
+unsigned int msi_get_virq(struct device *dev, unsigned int index);
+
 /* Helpers to hide struct msi_desc implementation details */
 #define msi_desc_to_dev(desc)		((desc)->dev)
 #define dev_to_msi_list(dev)		(&(dev)->msi_list)
-- 
cgit v1.2.3


From d86a6d47bcc6b41fe2a4e13313d66a772d00382f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:34 +0100
Subject: bus: fsl-mc: fsl-mc-allocator: Rework MSI handling

Storing a pointer to the MSI descriptor just to track the Linux interrupt
number is daft. Just store the interrupt number and be done with it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211210221815.207838579@linutronix.de
---
 include/linux/fsl/mc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index e026f6c48b49..7b6c42bfb660 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -91,13 +91,13 @@ struct fsl_mc_resource {
 
 /**
  * struct fsl_mc_device_irq - MC object device message-based interrupt
- * @msi_desc: pointer to MSI descriptor allocated by fsl_mc_msi_alloc_descs()
+ * @virq: Linux virtual interrupt number
  * @mc_dev: MC object device that owns this interrupt
  * @dev_irq_index: device-relative IRQ index
  * @resource: MC generic resource associated with the interrupt
  */
 struct fsl_mc_device_irq {
-	struct msi_desc *msi_desc;
+	unsigned int virq;
 	struct fsl_mc_device *mc_dev;
 	u8 dev_irq_index;
 	struct fsl_mc_resource resource;
-- 
cgit v1.2.3


From 89e0032ec201f76c86d6e3e6f94574dfb8e39b71 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Dec 2021 23:19:35 +0100
Subject: soc: ti: ti_sci_inta_msi: Get rid of ti_sci_inta_msi_get_virq()

Just use the core function msi_get_virq().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Vinod Koul <vkoul@kernel.org>
Acked-by: Nishanth Menon <nm@ti.com>
Link: https://lore.kernel.org/r/20211210221815.269468319@linutronix.de
---
 include/linux/soc/ti/ti_sci_inta_msi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/soc/ti/ti_sci_inta_msi.h b/include/linux/soc/ti/ti_sci_inta_msi.h
index e3aa8b14612e..25ea78a8ea5c 100644
--- a/include/linux/soc/ti/ti_sci_inta_msi.h
+++ b/include/linux/soc/ti/ti_sci_inta_msi.h
@@ -18,6 +18,5 @@ struct irq_domain
 				   struct irq_domain *parent);
 int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev,
 				      struct ti_sci_resource *res);
-unsigned int ti_sci_inta_msi_get_virq(struct device *dev, u32 index);
 void ti_sci_inta_msi_domain_free_irqs(struct device *dev);
 #endif /* __INCLUDE_LINUX_IRQCHIP_TI_SCI_INTA_H */
-- 
cgit v1.2.3


From 125282cd4f33ecd53a24ae4807409da0e5e90fd4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:04 +0100
Subject: genirq/msi: Move descriptor list to struct msi_device_data

It's only required when MSI is in use.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210747.650487479@linutronix.de
---
 include/linux/device.h | 4 ----
 include/linux/msi.h    | 4 +++-
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index f0033cd93631..93459724dcde 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -423,7 +423,6 @@ struct dev_msi_info {
  * @pins:	For device pin management.
  *		See Documentation/driver-api/pin-control.rst for details.
  * @msi:	MSI related data
- * @msi_list:	Hosts MSI descriptors
  * @numa_node:	NUMA node this device is close to.
  * @dma_ops:    DMA mapping operations for this device.
  * @dma_mask:	Dma mask (if dma'ble device).
@@ -519,9 +518,6 @@ struct device {
 	struct dev_pin_info	*pins;
 #endif
 	struct dev_msi_info	msi;
-#ifdef CONFIG_GENERIC_MSI_IRQ
-	struct list_head	msi_list;
-#endif
 #ifdef CONFIG_DMA_OPS
 	const struct dma_map_ops *dma_ops;
 #endif
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7593fc383dba..4223e47103ed 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -144,11 +144,13 @@ struct msi_desc {
  * @properties:		MSI properties which are interesting to drivers
  * @attrs:		Pointer to the sysfs attribute group
  * @platform_data:	Platform-MSI specific data
+ * @list:		List of MSI descriptors associated to the device
  */
 struct msi_device_data {
 	unsigned long			properties;
 	const struct attribute_group    **attrs;
 	struct platform_msi_priv_data	*platform_data;
+	struct list_head		list;
 };
 
 int msi_setup_device_data(struct device *dev);
@@ -157,7 +159,7 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index);
 
 /* Helpers to hide struct msi_desc implementation details */
 #define msi_desc_to_dev(desc)		((desc)->dev)
-#define dev_to_msi_list(dev)		(&(dev)->msi_list)
+#define dev_to_msi_list(dev)		(&(dev)->msi.data->list)
 #define first_msi_entry(dev)		\
 	list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list)
 #define for_each_msi_entry(desc, dev)	\
-- 
cgit v1.2.3


From b5f687f97d1e112493fe0447a1fb09fbd93c334b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:05 +0100
Subject: genirq/msi: Add mutex for MSI list protection

For upcoming runtime extensions of MSI-X interrupts it's required to
protect the MSI descriptor list. Add a mutex to struct msi_device_data and
provide lock/unlock functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210747.708877269@linutronix.de
---
 include/linux/msi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4223e47103ed..2cf6c530588d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -3,6 +3,7 @@
 #define LINUX_MSI_H
 
 #include <linux/cpumask.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
 #include <asm/msi.h>
 
@@ -145,17 +146,21 @@ struct msi_desc {
  * @attrs:		Pointer to the sysfs attribute group
  * @platform_data:	Platform-MSI specific data
  * @list:		List of MSI descriptors associated to the device
+ * @mutex:		Mutex protecting the MSI list
  */
 struct msi_device_data {
 	unsigned long			properties;
 	const struct attribute_group    **attrs;
 	struct platform_msi_priv_data	*platform_data;
 	struct list_head		list;
+	struct mutex			mutex;
 };
 
 int msi_setup_device_data(struct device *dev);
 
 unsigned int msi_get_virq(struct device *dev, unsigned int index);
+void msi_lock_descs(struct device *dev);
+void msi_unlock_descs(struct device *dev);
 
 /* Helpers to hide struct msi_desc implementation details */
 #define msi_desc_to_dev(desc)		((desc)->dev)
-- 
cgit v1.2.3


From 0f62d941acf9ac3b6025692ce649b1f282b89e7f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:07 +0100
Subject: genirq/msi: Provide msi_domain_alloc/free_irqs_descs_locked()

Usage sites which do allocations of the MSI descriptors before invoking
msi_domain_alloc_irqs() require to lock the MSI decriptors accross the
operation.

Provide entry points which can be called with the MSI mutex held and lock
the mutex in the existing entry points.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210747.765371053@linutronix.de
---
 include/linux/msi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 2cf6c530588d..69c588efe85b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -383,9 +383,12 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
 					 struct irq_domain *parent);
 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 			    int nvec);
+int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev,
+				       int nvec);
 int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 			  int nvec);
 void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
+void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev);
 void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
 
-- 
cgit v1.2.3


From 1046f71d7268b1680d7b044dea83c664403f6302 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:08 +0100
Subject: genirq/msi: Provide a set of advanced MSI accessors and iterators

In preparation for dynamic handling of MSI-X interrupts provide a new set
of MSI descriptor accessor functions and iterators. They are benefitial per
se as they allow to cleanup quite some code in various MSI domain
implementations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210747.818635078@linutronix.de
---
 include/linux/msi.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 69c588efe85b..703221f7e9ea 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -140,6 +140,18 @@ struct msi_desc {
 	struct pci_msi_desc		pci;
 };
 
+/*
+ * Filter values for the MSI descriptor iterators and accessor functions.
+ */
+enum msi_desc_filter {
+	/* All descriptors */
+	MSI_DESC_ALL,
+	/* Descriptors which have no interrupt associated */
+	MSI_DESC_NOTASSOCIATED,
+	/* Descriptors which have an interrupt associated */
+	MSI_DESC_ASSOCIATED,
+};
+
 /**
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
@@ -147,6 +159,7 @@ struct msi_desc {
  * @platform_data:	Platform-MSI specific data
  * @list:		List of MSI descriptors associated to the device
  * @mutex:		Mutex protecting the MSI list
+ * @__next:		Cached pointer to the next entry for iterators
  */
 struct msi_device_data {
 	unsigned long			properties;
@@ -154,6 +167,7 @@ struct msi_device_data {
 	struct platform_msi_priv_data	*platform_data;
 	struct list_head		list;
 	struct mutex			mutex;
+	struct msi_desc			*__next;
 };
 
 int msi_setup_device_data(struct device *dev);
@@ -162,6 +176,25 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index);
 void msi_lock_descs(struct device *dev);
 void msi_unlock_descs(struct device *dev);
 
+struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter);
+struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
+
+/**
+ * msi_for_each_desc - Iterate the MSI descriptors
+ *
+ * @desc:	struct msi_desc pointer used as iterator
+ * @dev:	struct device pointer - device to iterate
+ * @filter:	Filter for descriptor selection
+ *
+ * Notes:
+ *  - The loop must be protected with a msi_lock_descs()/msi_unlock_descs()
+ *    pair.
+ *  - It is safe to remove a retrieved MSI descriptor in the loop.
+ */
+#define msi_for_each_desc(desc, dev, filter)			\
+	for ((desc) = msi_first_desc((dev), (filter)); (desc);	\
+	     (desc) = msi_next_desc((dev), (filter)))
+
 /* Helpers to hide struct msi_desc implementation details */
 #define msi_desc_to_dev(desc)		((desc)->dev)
 #define dev_to_msi_list(dev)		(&(dev)->msi.data->list)
-- 
cgit v1.2.3


From 602905253607ba892336f7bba8bb45b5be819d87 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:10 +0100
Subject: genirq/msi: Provide msi_alloc_msi_desc() and a simple allocator

Provide msi_alloc_msi_desc() which takes a template MSI descriptor for
initializing a newly allocated descriptor. This allows to simplify various
usage sites of alloc_msi_entry() and moves the storage handling into the
core code.

For simple cases where only a linear vector space is required provide
msi_add_simple_msi_descs() which just allocates a linear range of MSI
descriptors and fills msi_desc::msi_index accordingly.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210747.873833567@linutronix.de
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 703221f7e9ea..bbb8c1e2c18b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -247,6 +247,8 @@ static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
 }
 #endif /* CONFIG_PCI_MSI */
 
+int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc);
+
 struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
 				 const struct irq_affinity_desc *affinity);
 void free_msi_entry(struct msi_desc *entry);
-- 
cgit v1.2.3


From 645474e2cee450131e8b8d8a69a5d9bbabd43f3f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:12 +0100
Subject: genirq/msi: Provide domain flags to allocate/free MSI descriptors
 automatically

Provide domain info flags which tell the core to allocate simple
descriptors or to free descriptors when the interrupts are freed and
implement the required functionality.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210747.928198636@linutronix.de
---
 include/linux/msi.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index bbb8c1e2c18b..17e47ab8d57a 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -105,6 +105,8 @@ struct pci_msi_desc {
 	};
 };
 
+#define MSI_MAX_INDEX		((unsigned int)USHRT_MAX)
+
 /**
  * struct msi_desc - Descriptor structure for MSI based interrupts
  * @list:	List head for management
@@ -248,6 +250,17 @@ static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
 #endif /* CONFIG_PCI_MSI */
 
 int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc);
+void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
+			      unsigned int first_index, unsigned int last_index);
+
+/**
+ * msi_free_msi_descs - Free MSI descriptors of a device
+ * @dev:	Device to free the descriptors
+ */
+static inline void msi_free_msi_descs(struct device *dev)
+{
+	msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX);
+}
 
 struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
 				 const struct irq_affinity_desc *affinity);
@@ -408,6 +421,10 @@ enum {
 	MSI_FLAG_DEV_SYSFS		= (1 << 7),
 	/* MSI-X entries must be contiguous */
 	MSI_FLAG_MSIX_CONTIGUOUS	= (1 << 8),
+	/* Allocate simple MSI descriptors */
+	MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS	= (1 << 9),
+	/* Free MSI descriptors */
+	MSI_FLAG_FREE_MSI_DESCS		= (1 << 10),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
-- 
cgit v1.2.3


From 7ad321a5eadb52b4af1c577dda51783e08235ea7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:37 +0100
Subject: soc: ti: ti_sci_inta_msi: Remove ti_sci_inta_msi_domain_free_irqs()

The function has no users and is pointless now that the core frees the MSI
descriptors, which means potential users can just use msi_domain_free_irqs().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210748.793119155@linutronix.de
---
 include/linux/soc/ti/ti_sci_inta_msi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/soc/ti/ti_sci_inta_msi.h b/include/linux/soc/ti/ti_sci_inta_msi.h
index 25ea78a8ea5c..4dba2f2aff6f 100644
--- a/include/linux/soc/ti/ti_sci_inta_msi.h
+++ b/include/linux/soc/ti/ti_sci_inta_msi.h
@@ -18,5 +18,4 @@ struct irq_domain
 				   struct irq_domain *parent);
 int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev,
 				      struct ti_sci_resource *res);
-void ti_sci_inta_msi_domain_free_irqs(struct device *dev);
 #endif /* __INCLUDE_LINUX_IRQCHIP_TI_SCI_INTA_H */
-- 
cgit v1.2.3


From ef8dd01538ea2553ab101ddce6a85a321406d9c0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:44 +0100
Subject: genirq/msi: Make interrupt allocation less convoluted

There is no real reason to do several loops over the MSI descriptors
instead of just doing one loop. In case of an error everything is undone
anyway so it does not matter whether it's a partial or a full rollback.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210749.010234767@linutronix.de
---
 include/linux/msi.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 17e47ab8d57a..e8dd0be17e89 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -206,12 +206,6 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
 	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
 #define for_each_msi_entry_safe(desc, tmp, dev)	\
 	list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
-#define for_each_msi_vector(desc, __irq, dev)				\
-	for_each_msi_entry((desc), (dev))				\
-		if ((desc)->irq)					\
-			for (__irq = (desc)->irq;			\
-			     __irq < ((desc)->irq + (desc)->nvec_used);	\
-			     __irq++)
 
 #ifdef CONFIG_IRQ_MSI_IOMMU
 static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
-- 
cgit v1.2.3


From cc9a246dbf6bdef56d9eee296a1db52dd0607976 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:47 +0100
Subject: genirq/msi: Mop up old interfaces

Get rid of the old iterators, alloc/free functions and adjust the core code
accordingly.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210749.117395027@linutronix.de
---
 include/linux/msi.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index e8dd0be17e89..b54010ba7b0d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -197,15 +197,7 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter);
 	for ((desc) = msi_first_desc((dev), (filter)); (desc);	\
 	     (desc) = msi_next_desc((dev), (filter)))
 
-/* Helpers to hide struct msi_desc implementation details */
 #define msi_desc_to_dev(desc)		((desc)->dev)
-#define dev_to_msi_list(dev)		(&(dev)->msi.data->list)
-#define first_msi_entry(dev)		\
-	list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list)
-#define for_each_msi_entry(desc, dev)	\
-	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
-#define for_each_msi_entry_safe(desc, tmp, dev)	\
-	list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
 
 #ifdef CONFIG_IRQ_MSI_IOMMU
 static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
@@ -231,10 +223,6 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc,
 #endif
 
 #ifdef CONFIG_PCI_MSI
-#define first_pci_msi_entry(pdev)	first_msi_entry(&(pdev)->dev)
-#define for_each_pci_msi_entry(desc, pdev)	\
-	for_each_msi_entry((desc), &(pdev)->dev)
-
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc);
 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
 #else /* CONFIG_PCI_MSI */
@@ -256,9 +244,6 @@ static inline void msi_free_msi_descs(struct device *dev)
 	msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX);
 }
 
-struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
-				 const struct irq_affinity_desc *affinity);
-void free_msi_entry(struct msi_desc *entry);
 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 
-- 
cgit v1.2.3


From ef3350c53d2aac65cf1c4ecc968bbb1de5f421ea Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:49 +0100
Subject: genirq/msi: Add abuse prevention comment to msi header

Hope dies last.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210749.170847844@linutronix.de
---
 include/linux/msi.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index b54010ba7b0d..70cc6a555a8e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -2,6 +2,20 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+/*
+ * This header file contains MSI data structures and functions which are
+ * only relevant for:
+ *	- Interrupt core code
+ *	- PCI/MSI core code
+ *	- MSI interrupt domain implementations
+ *	- IOMMU, low level VFIO, NTB and other justified exceptions
+ *	  dealing with low level MSI details.
+ *
+ * Regular device drivers have no business with any of these functions and
+ * especially storing MSI descriptor pointers in random code is considered
+ * abuse. The only function which is relevant for drivers is msi_get_virq().
+ */
+
 #include <linux/cpumask.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
-- 
cgit v1.2.3


From bf5e758f02fc739589dcc6a3395c3a3eb77b5c90 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:50 +0100
Subject: genirq/msi: Simplify sysfs handling

The sysfs handling for MSI is a convoluted maze and it is in the way of
supporting dynamic expansion of the MSI-X vectors because it only supports
a one off bulk population/free of the sysfs entries.

Change it to do:

   1) Creating an empty sysfs attribute group when msi_device_data is
      allocated

   2) Populate the entries when the MSI descriptor is initialized

   3) Free the entries when a MSI descriptor is detached from a Linux
      interrupt.

   4) Provide functions for the legacy non-irqdomain fallback code to
      do a bulk population/free. This code won't support dynamic
      expansion.

This makes the code simpler and reduces the number of allocations as the
empty attribute group can be shared.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210749.224917330@linutronix.de
---
 include/linux/msi.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 70cc6a555a8e..1a00367d2cfa 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -71,7 +71,7 @@ struct irq_data;
 struct msi_desc;
 struct pci_dev;
 struct platform_msi_priv_data;
-struct attribute_group;
+struct device_attribute;
 
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 #ifdef CONFIG_GENERIC_MSI_IRQ
@@ -129,6 +129,7 @@ struct pci_msi_desc {
  * @dev:	Pointer to the device which uses this descriptor
  * @msg:	The last set MSI message cached for reuse
  * @affinity:	Optional pointer to a cpu affinity mask for this descriptor
+ * @sysfs_attr:	Pointer to sysfs device attribute
  *
  * @write_msi_msg:	Callback that may be called when the MSI message
  *			address or data changes
@@ -148,6 +149,9 @@ struct msi_desc {
 #ifdef CONFIG_IRQ_MSI_IOMMU
 	const void			*iommu_cookie;
 #endif
+#ifdef CONFIG_SYSFS
+	struct device_attribute		*sysfs_attrs;
+#endif
 
 	void (*write_msi_msg)(struct msi_desc *entry, void *data);
 	void *write_msi_msg_data;
@@ -171,7 +175,6 @@ enum msi_desc_filter {
 /**
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
- * @attrs:		Pointer to the sysfs attribute group
  * @platform_data:	Platform-MSI specific data
  * @list:		List of MSI descriptors associated to the device
  * @mutex:		Mutex protecting the MSI list
@@ -179,7 +182,6 @@ enum msi_desc_filter {
  */
 struct msi_device_data {
 	unsigned long			properties;
-	const struct attribute_group    **attrs;
 	struct platform_msi_priv_data	*platform_data;
 	struct list_head		list;
 	struct mutex			mutex;
@@ -264,14 +266,6 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void pci_msi_mask_irq(struct irq_data *data);
 void pci_msi_unmask_irq(struct irq_data *data);
 
-#ifdef CONFIG_SYSFS
-int msi_device_populate_sysfs(struct device *dev);
-void msi_device_destroy_sysfs(struct device *dev);
-#else /* CONFIG_SYSFS */
-static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
-static inline void msi_device_destroy_sysfs(struct device *dev) { }
-#endif /* !CONFIG_SYSFS */
-
 /*
  * The arch hooks to setup up msi irqs. Default functions are implemented
  * as weak symbols so that they /can/ be overriden by architecture specific
@@ -285,6 +279,13 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
+#ifdef CONFIG_SYSFS
+int msi_device_populate_sysfs(struct device *dev);
+void msi_device_destroy_sysfs(struct device *dev);
+#else /* CONFIG_SYSFS */
+static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
+static inline void msi_device_destroy_sysfs(struct device *dev) { }
+#endif /* !CONFIG_SYSFS */
 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
 
 /*
-- 
cgit v1.2.3


From cd6cf06590b9792340dceaa285138777f3cc4d90 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Dec 2021 23:51:52 +0100
Subject: genirq/msi: Convert storage to xarray

The current linked list storage for MSI descriptors is suboptimal in
several ways:

  1) Looking up a MSI desciptor requires a O(n) list walk in the worst case

  2) The upcoming support of runtime expansion of MSI-X vectors would need
     to do a full list walk to figure out whether a particular index is
     already associated.

  3) Runtime expansion of sparse allocations is even more complex as the
     current implementation assumes an ordered list (increasing MSI index).

Use an xarray which solves all of the above problems nicely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20211206210749.280627070@linutronix.de
---
 include/linux/msi.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1a00367d2cfa..fc918a658d48 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -17,6 +17,7 @@
  */
 
 #include <linux/cpumask.h>
+#include <linux/xarray.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <asm/msi.h>
@@ -123,7 +124,6 @@ struct pci_msi_desc {
 
 /**
  * struct msi_desc - Descriptor structure for MSI based interrupts
- * @list:	List head for management
  * @irq:	The base interrupt number
  * @nvec_used:	The number of vectors used
  * @dev:	Pointer to the device which uses this descriptor
@@ -140,7 +140,6 @@ struct pci_msi_desc {
  */
 struct msi_desc {
 	/* Shared device/bus type independent data */
-	struct list_head		list;
 	unsigned int			irq;
 	unsigned int			nvec_used;
 	struct device			*dev;
@@ -176,16 +175,16 @@ enum msi_desc_filter {
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
  * @platform_data:	Platform-MSI specific data
- * @list:		List of MSI descriptors associated to the device
- * @mutex:		Mutex protecting the MSI list
- * @__next:		Cached pointer to the next entry for iterators
+ * @mutex:		Mutex protecting the MSI descriptor store
+ * @__store:		Xarray for storing MSI descriptor pointers
+ * @__iter_idx:		Index to search the next entry for iterators
  */
 struct msi_device_data {
 	unsigned long			properties;
 	struct platform_msi_priv_data	*platform_data;
-	struct list_head		list;
 	struct mutex			mutex;
-	struct msi_desc			*__next;
+	struct xarray			__store;
+	unsigned long			__iter_idx;
 };
 
 int msi_setup_device_data(struct device *dev);
-- 
cgit v1.2.3


From 60f20d84dc813f1342771a3e4f06d89da26dc412 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 18 Nov 2021 12:12:10 -0600
Subject: of/fdt: Rework early_init_dt_scan_chosen() to call directly

Use of the of_scan_flat_dt() function predates libfdt and is discouraged
as libfdt provides a nicer set of APIs. Rework
early_init_dt_scan_chosen() to be called directly and use libfdt.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Link: https://lore.kernel.org/r/20211118181213.1433346-2-robh@kernel.org
---
 include/linux/of_fdt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index cf48983d3c86..654722235df6 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -58,8 +58,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
 extern uint32_t of_get_flat_dt_phandle(unsigned long node);
 
-extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
-				     int depth, void *data);
+extern int early_init_dt_scan_chosen(char *cmdline);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern int early_init_dt_scan_chosen_stdout(void);
-- 
cgit v1.2.3


From d665881d2171b62ca1ea23be89be6f2a8a330bb2 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 18 Nov 2021 12:12:11 -0600
Subject: of/fdt: Rework early_init_dt_scan_root() to call directly

Use of the of_scan_flat_dt() function predates libfdt and is discouraged
as libfdt provides a nicer set of APIs. Rework early_init_dt_scan_root()
to be called directly and use libfdt.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Link: https://lore.kernel.org/r/20211118181213.1433346-3-robh@kernel.org
---
 include/linux/of_fdt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 654722235df6..df3d31926c3c 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -68,8 +68,7 @@ extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern u64 dt_mem_next_cell(int s, const __be32 **cellp);
 
 /* Early flat tree scan hooks */
-extern int early_init_dt_scan_root(unsigned long node, const char *uname,
-				   int depth, void *data);
+extern int early_init_dt_scan_root(void);
 
 extern bool early_init_dt_scan(void *params);
 extern bool early_init_dt_verify(void *params);
-- 
cgit v1.2.3


From 1f012283e9360fb4007308f04cfaeb205e34b684 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 15 Dec 2021 09:01:02 -0600
Subject: of/fdt: Rework early_init_dt_scan_memory() to call directly

Use of the of_scan_flat_dt() function predates libfdt and is discouraged
as libfdt provides a nicer set of APIs. Rework
early_init_dt_scan_memory() to be called directly and use libfdt.

Cc: John Crispin <john@phrozen.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: linux-mips@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211215150102.1303588-1-robh@kernel.org
---
 include/linux/of_fdt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index df3d31926c3c..914739f3c192 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -59,8 +59,7 @@ extern unsigned long of_get_flat_dt_root(void);
 extern uint32_t of_get_flat_dt_phandle(unsigned long node);
 
 extern int early_init_dt_scan_chosen(char *cmdline);
-extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
-				     int depth, void *data);
+extern int early_init_dt_scan_memory(void);
 extern int early_init_dt_scan_chosen_stdout(void);
 extern void early_init_fdt_scan_reserved_mem(void);
 extern void early_init_fdt_reserve_self(void);
-- 
cgit v1.2.3


From f7ea534a0920dbaf71a8003936e178e14ec9271d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Dec 2021 18:55:36 -0800
Subject: add includes masked by cgroup -> bpf dependency

cgroup pulls in BPF which pulls in a lot of includes.
We're about to break that chain so fix those who were
depending on it.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211216025538.1649516-2-kuba@kernel.org
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0dcfd265beed..4a021149eaf0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -611,6 +611,7 @@ struct swevent_hlist {
 #define PERF_ATTACH_SCHED_CB	0x20
 #define PERF_ATTACH_CHILD	0x40
 
+struct bpf_prog;
 struct perf_cgroup;
 struct perf_buffer;
 
-- 
cgit v1.2.3


From fd1740b6abac39f68ce12e201697f106e0f1d519 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Dec 2021 18:55:38 -0800
Subject: bpf: Remove the cgroup -> bpf header dependecy

Remove the dependency from cgroup-defs.h to bpf-cgroup.h and bpf.h.
This reduces the incremental build size of x86 allmodconfig after
bpf.h was touched from ~17k objects rebuilt to ~5k objects.
bpf.h is 2.2kLoC and is modified relatively often.

We need a new header with just the definition of struct cgroup_bpf
and enum cgroup_bpf_attach_type, this is akin to cgroup-defs.h.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/bpf/20211216025538.1649516-4-kuba@kernel.org
---
 include/linux/bpf-cgroup-defs.h | 70 +++++++++++++++++++++++++++++++++++++++++
 include/linux/bpf-cgroup.h      | 57 +--------------------------------
 include/linux/cgroup-defs.h     |  2 +-
 3 files changed, 72 insertions(+), 57 deletions(-)
 create mode 100644 include/linux/bpf-cgroup-defs.h

(limited to 'include')

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
new file mode 100644
index 000000000000..695d1224a71b
--- /dev/null
+++ b/include/linux/bpf-cgroup-defs.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_CGROUP_DEFS_H
+#define _BPF_CGROUP_DEFS_H
+
+#ifdef CONFIG_CGROUP_BPF
+
+#include <linux/list.h>
+#include <linux/percpu-refcount.h>
+#include <linux/workqueue.h>
+
+struct bpf_prog_array;
+
+enum cgroup_bpf_attach_type {
+	CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
+	CGROUP_INET_INGRESS = 0,
+	CGROUP_INET_EGRESS,
+	CGROUP_INET_SOCK_CREATE,
+	CGROUP_SOCK_OPS,
+	CGROUP_DEVICE,
+	CGROUP_INET4_BIND,
+	CGROUP_INET6_BIND,
+	CGROUP_INET4_CONNECT,
+	CGROUP_INET6_CONNECT,
+	CGROUP_INET4_POST_BIND,
+	CGROUP_INET6_POST_BIND,
+	CGROUP_UDP4_SENDMSG,
+	CGROUP_UDP6_SENDMSG,
+	CGROUP_SYSCTL,
+	CGROUP_UDP4_RECVMSG,
+	CGROUP_UDP6_RECVMSG,
+	CGROUP_GETSOCKOPT,
+	CGROUP_SETSOCKOPT,
+	CGROUP_INET4_GETPEERNAME,
+	CGROUP_INET6_GETPEERNAME,
+	CGROUP_INET4_GETSOCKNAME,
+	CGROUP_INET6_GETSOCKNAME,
+	CGROUP_INET_SOCK_RELEASE,
+	MAX_CGROUP_BPF_ATTACH_TYPE
+};
+
+struct cgroup_bpf {
+	/* array of effective progs in this cgroup */
+	struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
+
+	/* attached progs to this cgroup and attach flags
+	 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
+	 * have either zero or one element
+	 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
+	 */
+	struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+	u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
+
+	/* list of cgroup shared storages */
+	struct list_head storages;
+
+	/* temp storage for effective prog array used by prog_attach/detach */
+	struct bpf_prog_array *inactive;
+
+	/* reference counter used to detach bpf programs after cgroup removal */
+	struct percpu_ref refcnt;
+
+	/* cgroup_bpf is released using a work queue */
+	struct work_struct release_work;
+};
+
+#else /* CONFIG_CGROUP_BPF */
+struct cgroup_bpf {};
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 11820a430d6c..b525d8cdc25b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -3,10 +3,10 @@
 #define _BPF_CGROUP_H
 
 #include <linux/bpf.h>
+#include <linux/bpf-cgroup-defs.h>
 #include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
-#include <linux/percpu-refcount.h>
 #include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>
 
@@ -23,33 +23,6 @@ struct ctl_table_header;
 struct task_struct;
 
 #ifdef CONFIG_CGROUP_BPF
-enum cgroup_bpf_attach_type {
-	CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
-	CGROUP_INET_INGRESS = 0,
-	CGROUP_INET_EGRESS,
-	CGROUP_INET_SOCK_CREATE,
-	CGROUP_SOCK_OPS,
-	CGROUP_DEVICE,
-	CGROUP_INET4_BIND,
-	CGROUP_INET6_BIND,
-	CGROUP_INET4_CONNECT,
-	CGROUP_INET6_CONNECT,
-	CGROUP_INET4_POST_BIND,
-	CGROUP_INET6_POST_BIND,
-	CGROUP_UDP4_SENDMSG,
-	CGROUP_UDP6_SENDMSG,
-	CGROUP_SYSCTL,
-	CGROUP_UDP4_RECVMSG,
-	CGROUP_UDP6_RECVMSG,
-	CGROUP_GETSOCKOPT,
-	CGROUP_SETSOCKOPT,
-	CGROUP_INET4_GETPEERNAME,
-	CGROUP_INET6_GETPEERNAME,
-	CGROUP_INET4_GETSOCKNAME,
-	CGROUP_INET6_GETSOCKNAME,
-	CGROUP_INET_SOCK_RELEASE,
-	MAX_CGROUP_BPF_ATTACH_TYPE
-};
 
 #define CGROUP_ATYPE(type) \
 	case BPF_##type: return type
@@ -127,33 +100,6 @@ struct bpf_prog_list {
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
-struct bpf_prog_array;
-
-struct cgroup_bpf {
-	/* array of effective progs in this cgroup */
-	struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
-
-	/* attached progs to this cgroup and attach flags
-	 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
-	 * have either zero or one element
-	 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
-	 */
-	struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
-	u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
-
-	/* list of cgroup shared storages */
-	struct list_head storages;
-
-	/* temp storage for effective prog array used by prog_attach/detach */
-	struct bpf_prog_array *inactive;
-
-	/* reference counter used to detach bpf programs after cgroup removal */
-	struct percpu_ref refcnt;
-
-	/* cgroup_bpf is released using a work queue */
-	struct work_struct release_work;
-};
-
 int cgroup_bpf_inherit(struct cgroup *cgrp);
 void cgroup_bpf_offline(struct cgroup *cgrp);
 
@@ -451,7 +397,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr);
 #else
 
-struct cgroup_bpf {};
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
 
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index db2e147e069f..411684c80cf3 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -19,7 +19,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/workqueue.h>
-#include <linux/bpf-cgroup.h>
+#include <linux/bpf-cgroup-defs.h>
 #include <linux/psi_types.h>
 
 #ifdef CONFIG_CGROUPS
-- 
cgit v1.2.3


From 0f55f9ed21f96630c6ec96805d42f92c0b458b37 Mon Sep 17 00:00:00 2001
From: Christy Lee <christylee@fb.com>
Date: Thu, 16 Dec 2021 13:33:56 -0800
Subject: bpf: Only print scratched registers and stack slots to verifier logs.

When printing verifier state for any log level, print full verifier
state only on function calls or on errors. Otherwise, only print the
registers and stack slots that were accessed.

Log size differences:

verif_scale_loop6 before: 234566564
verif_scale_loop6 after: 72143943
69% size reduction

kfree_skb before: 166406
kfree_skb after: 55386
69% size reduction

Before:

156: (61) r0 = *(u32 *)(r1 +0)
157: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=ctx(id=0,off=0,imm=0) R2_w=invP0 R10=fp0 fp-8_w=00000000 fp-16_w=00\
000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000\
000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 fp-128_w=00000000 fp-136_w=00000000 fp-144_w=00\
000000 fp-152_w=00000000 fp-160_w=00000000 fp-168_w=00000000 fp-176_w=00000000 fp-184_w=00000000 fp-192_w=00000000 fp-200_w=00000000 fp-208\
_w=00000000 fp-216_w=00000000 fp-224_w=00000000 fp-232_w=00000000 fp-240_w=00000000 fp-248_w=00000000 fp-256_w=00000000 fp-264_w=00000000 f\
p-272_w=00000000 fp-280_w=00000000 fp-288_w=00000000 fp-296_w=00000000 fp-304_w=00000000 fp-312_w=00000000 fp-320_w=00000000 fp-328_w=00000\
000 fp-336_w=00000000 fp-344_w=00000000 fp-352_w=00000000 fp-360_w=00000000 fp-368_w=00000000 fp-376_w=00000000 fp-384_w=00000000 fp-392_w=\
00000000 fp-400_w=00000000 fp-408_w=00000000 fp-416_w=00000000 fp-424_w=00000000 fp-432_w=00000000 fp-440_w=00000000 fp-448_w=00000000
; return skb->len;
157: (95) exit
Func#4 is safe for any args that match its prototype
Validating get_constant() func#5...
158: R1=invP(id=0) R10=fp0
; int get_constant(long val)
158: (bf) r0 = r1
159: R0_w=invP(id=1) R1=invP(id=1) R10=fp0
; return val - 122;
159: (04) w0 += -122
160: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=invP(id=1) R10=fp0
; return val - 122;
160: (95) exit
Func#5 is safe for any args that match its prototype
Validating get_skb_ifindex() func#6...
161: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0
; int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
161: (bc) w0 = w3
162: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0

After:

156: (61) r0 = *(u32 *)(r1 +0)
157: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=ctx(id=0,off=0,imm=0)
; return skb->len;
157: (95) exit
Func#4 is safe for any args that match its prototype
Validating get_constant() func#5...
158: R1=invP(id=0) R10=fp0
; int get_constant(long val)
158: (bf) r0 = r1
159: R0_w=invP(id=1) R1=invP(id=1)
; return val - 122;
159: (04) w0 += -122
160: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
; return val - 122;
160: (95) exit
Func#5 is safe for any args that match its prototype
Validating get_skb_ifindex() func#6...
161: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0
; int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
161: (bc) w0 = w3
162: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R3=invP(id=0)

Signed-off-by: Christy Lee <christylee@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211216213358.3374427-2-christylee@fb.com
---
 include/linux/bpf_verifier.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 182b16a91084..c66f238c538d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -474,6 +474,13 @@ struct bpf_verifier_env {
 	/* longest register parentage chain walked for liveness marking */
 	u32 longest_mark_read_walk;
 	bpfptr_t fd_array;
+
+	/* bit mask to keep track of whether a register has been accessed
+	 * since the last time the function state was printed
+	 */
+	u32 scratched_regs;
+	/* Same as scratched_regs but for stack slots */
+	u64 scratched_stack_slots;
 };
 
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
-- 
cgit v1.2.3


From b77beaaee1be62a4623dd7142868c2e1180d8288 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Tue, 14 Dec 2021 18:46:20 -0600
Subject: dt-bindings: power: imx8mn: add defines for DISP blk-ctrl domains

This adds the defines for the power domains provided by the DISP
blk-ctrl.

Signed-off-by: Adam Ford <aford173@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/dt-bindings/power/imx8mn-power.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/power/imx8mn-power.h b/include/dt-bindings/power/imx8mn-power.h
index 102ee85a9b62..eedd0e581939 100644
--- a/include/dt-bindings/power/imx8mn-power.h
+++ b/include/dt-bindings/power/imx8mn-power.h
@@ -12,4 +12,9 @@
 #define IMX8MN_POWER_DOMAIN_DISPMIX	3
 #define IMX8MN_POWER_DOMAIN_MIPI	4
 
+#define IMX8MN_DISPBLK_PD_MIPI_DSI	0
+#define IMX8MN_DISPBLK_PD_MIPI_CSI	1
+#define IMX8MN_DISPBLK_PD_LCDIF	2
+#define IMX8MN_DISPBLK_PD_ISI	3
+
 #endif
-- 
cgit v1.2.3


From 2e5766483c8c5cf886b4dc647a1741738dde7d79 Mon Sep 17 00:00:00 2001
From: Christy Lee <christylee@fb.com>
Date: Thu, 16 Dec 2021 19:42:45 -0800
Subject: bpf: Right align verifier states in verifier logs.

Make the verifier logs more readable, print the verifier states
on the corresponding instruction line. If the previous line was
not a bpf instruction, then print the verifier states on its own
line.

Before:

Validating test_pkt_access_subprog3() func#3...
86: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R10=fp0
; int test_pkt_access_subprog3(int val, struct __sk_buff *skb)
86: (bf) r6 = r2
87: R2=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0)
87: (bc) w7 = w1
88: R1=invP(id=0) R7_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123));
88: (bf) r1 = r6
89: R1_w=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0)
89: (85) call pc+9
Func#4 is global and valid. Skipping.
90: R0_w=invP(id=0)
90: (bc) w8 = w0
91: R0_w=invP(id=0) R8_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123));
91: (b7) r1 = 123
92: R1_w=invP123
92: (85) call pc+65
Func#5 is global and valid. Skipping.
93: R0=invP(id=0)

After:

86: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R10=fp0
; int test_pkt_access_subprog3(int val, struct __sk_buff *skb)
86: (bf) r6 = r2                      ; R2=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0)
87: (bc) w7 = w1                      ; R1=invP(id=0) R7_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123));
88: (bf) r1 = r6                      ; R1_w=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0)
89: (85) call pc+9
Func#4 is global and valid. Skipping.
90: R0_w=invP(id=0)
90: (bc) w8 = w0                      ; R0_w=invP(id=0) R8_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123));
91: (b7) r1 = 123                     ; R1_w=invP123
92: (85) call pc+65
Func#5 is global and valid. Skipping.
93: R0=invP(id=0)

Signed-off-by: Christy Lee <christylee@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c66f238c538d..ee931398f311 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -388,6 +388,8 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 #define BPF_LOG_LEVEL	(BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
 #define BPF_LOG_MASK	(BPF_LOG_LEVEL | BPF_LOG_STATS)
 #define BPF_LOG_KERNEL	(BPF_LOG_MASK + 1) /* kernel internal flag */
+#define BPF_LOG_MIN_ALIGNMENT 8U
+#define BPF_LOG_ALIGNMENT 40U
 
 static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 {
@@ -481,6 +483,7 @@ struct bpf_verifier_env {
 	u32 scratched_regs;
 	/* Same as scratched_regs but for stack slots */
 	u64 scratched_stack_slots;
+	u32 prev_log_len, prev_insn_print_len;
 };
 
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
-- 
cgit v1.2.3


From bdecfceffeeb9000e78b0f613069f5c06974b347 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Nov 2021 23:21:54 +0100
Subject: ASoC: dai_dma: remove slave_id field

This field is no longer set from any driver now, so remove the
last references as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20211122222203.4103644-3-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/sound/dmaengine_pcm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index 96666efddb39..38ea046e653c 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -60,7 +60,6 @@ struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream)
  * @maxburst: Maximum number of words(note: words, as in units of the
  * src_addr_width member, not bytes) that can be send to or received from the
  * DAI in one burst.
- * @slave_id: Slave requester id for the DMA channel.
  * @filter_data: Custom DMA channel filter data, this will usually be used when
  * requesting the DMA channel.
  * @chan_name: Custom channel name to use when requesting DMA channel.
@@ -74,7 +73,6 @@ struct snd_dmaengine_dai_dma_data {
 	dma_addr_t addr;
 	enum dma_slave_buswidth addr_width;
 	u32 maxburst;
-	unsigned int slave_id;
 	void *filter_data;
 	const char *chan_name;
 	unsigned int fifo_size;
-- 
cgit v1.2.3


From 03de6b273805b3c552ff158f8688555937375926 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Nov 2021 23:22:00 +0100
Subject: dmaengine: qcom-adm: stop abusing slave_id config

The slave_id was previously used to pick one DMA slave instead of another,
but this is now done through the DMA descriptors in device tree.

For the qcom_adm driver, the configuration is documented in the DT
binding to contain a tuple of device identifier and a "crci" field,
but the implementation ends up using only a single cell for identifying
the slave, with the crci getting passed in nonstandard properties of
the device, and passed through the dma driver using the old slave_id
field. Part of the problem apparently is that the nand driver ends up
using only a single DMA request ID, but requires distinct values for
"crci" depending on the type of transfer.

Change both the dmaengine driver and the two slave drivers to allow
the documented binding to work in addition to the ad-hoc passing
of crci values. In order to no longer abuse the slave_id field, pass
the data using the "peripheral_config" mechanism instead.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20211122222203.4103644-9-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/qcom_adm.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 include/linux/dma/qcom_adm.h

(limited to 'include')

diff --git a/include/linux/dma/qcom_adm.h b/include/linux/dma/qcom_adm.h
new file mode 100644
index 000000000000..af20df674f0c
--- /dev/null
+++ b/include/linux/dma/qcom_adm.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __LINUX_DMA_QCOM_ADM_H
+#define __LINUX_DMA_QCOM_ADM_H
+
+#include <linux/types.h>
+
+struct qcom_adm_peripheral_config {
+	u32 crci;
+	u32 mux;
+};
+
+#endif /* __LINUX_DMA_QCOM_ADM_H */
-- 
cgit v1.2.3


From 93cdb5b0dc56cc7a8b87a61146495f3bdc93d7ba Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Nov 2021 23:22:01 +0100
Subject: dmaengine: xilinx_dpdma: stop using slave_id field

The display driver wants to pass a custom flag to the DMA engine driver,
which it started doing by using the slave_id field that was traditionally
used for a different purpose.

As there is no longer a correct use for the slave_id field, it should
really be removed, and the remaining users changed over to something
different.

The new mechanism for passing nonstandard settings is using the
.peripheral_config field, so use that to pass a newly defined structure
here, making it clear that this will not work in portable drivers.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20211122222203.4103644-10-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/xilinx_dpdma.h | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 include/linux/dma/xilinx_dpdma.h

(limited to 'include')

diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h
new file mode 100644
index 000000000000..83a1377f03f8
--- /dev/null
+++ b/include/linux/dma/xilinx_dpdma.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __LINUX_DMA_XILINX_DPDMA_H
+#define __LINUX_DMA_XILINX_DPDMA_H
+
+#include <linux/types.h>
+
+struct xilinx_dpdma_peripheral_config {
+	bool video_group;
+};
+
+#endif /* __LINUX_DMA_XILINX_DPDMA_H */
-- 
cgit v1.2.3


From 3c219644075795a99271d345efdfa8b256e55161 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Nov 2021 23:22:03 +0100
Subject: dmaengine: remove slave_id config field

All references to the slave_id field have been removed, so remove the
field as well to prevent new references from creeping in again.

Originally this allowed slave DMA drivers to configure which device
is accessed with the dmaengine_slave_config() call, but this was
inconsistent, as the same information is also passed while requesting
a channel, and never changes in practice.

In modern kernels, the device is always selected when requesting
the channel, so the .slave_id field is no longer useful.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20211122222203.4103644-12-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 9000f3ffce8b..0349b35235e6 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -418,9 +418,6 @@ enum dma_slave_buswidth {
  * @device_fc: Flow Controller Settings. Only valid for slave channels. Fill
  * with 'true' if peripheral should be flow controller. Direction will be
  * selected at Runtime.
- * @slave_id: Slave requester id. Only valid for slave channels. The dma
- * slave peripheral will have unique id as dma requester which need to be
- * pass as slave config.
  * @peripheral_config: peripheral configuration for programming peripheral
  * for dmaengine transfer
  * @peripheral_size: peripheral configuration buffer size
@@ -448,7 +445,6 @@ struct dma_slave_config {
 	u32 src_port_window_size;
 	u32 dst_port_window_size;
 	bool device_fc;
-	unsigned int slave_id;
 	void *peripheral_config;
 	size_t peripheral_size;
 };
-- 
cgit v1.2.3


From 3d725965f836a7acbd1674e33644bec18373de53 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 7 Dec 2021 15:33:06 -0800
Subject: crypto: ccp - Add SEV_INIT_EX support

Add new module parameter to allow users to use SEV_INIT_EX instead of
SEV_INIT. This helps users who lock their SPI bus to use the PSP for SEV
functionality. The 'init_ex_path' parameter defaults to NULL which means
the kernel will use SEV_INIT, if a path is specified SEV_INIT_EX will be
used with the data found at the path. On certain PSP commands this
file is written to as the PSP updates the NV memory region. Depending on
file system initialization this file open may fail during module init
but the CCP driver for SEV already has sufficient retries for platform
initialization. During normal operation of PSP system and SEV commands
if the PSP has not been initialized it is at run time. If the file at
'init_ex_path' does not exist the PSP will not be initialized. The user
must create the file prior to use with 32Kb of 0xFFs per spec.

Signed-off-by: David Rientjes <rientjes@google.com>
Co-developed-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Peter Gonda <pgonda@google.com>
Reviewed-by: Marc Orr <marcorr@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Marc Orr <marcorr@google.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Rientjes <rientjes@google.com>
Cc: John Allen <john.allen@amd.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-sev.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index d48a7192e881..1595088c428b 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -52,6 +52,7 @@ enum sev_cmd {
 	SEV_CMD_DF_FLUSH		= 0x00A,
 	SEV_CMD_DOWNLOAD_FIRMWARE	= 0x00B,
 	SEV_CMD_GET_ID			= 0x00C,
+	SEV_CMD_INIT_EX                 = 0x00D,
 
 	/* Guest commands */
 	SEV_CMD_DECOMMISSION		= 0x020,
@@ -102,6 +103,26 @@ struct sev_data_init {
 	u32 tmr_len;			/* In */
 } __packed;
 
+/**
+ * struct sev_data_init_ex - INIT_EX command parameters
+ *
+ * @length: len of the command buffer read by the PSP
+ * @flags: processing flags
+ * @tmr_address: system physical address used for SEV-ES
+ * @tmr_len: len of tmr_address
+ * @nv_address: system physical address used for PSP NV storage
+ * @nv_len: len of nv_address
+ */
+struct sev_data_init_ex {
+	u32 length;                     /* In */
+	u32 flags;                      /* In */
+	u64 tmr_address;                /* In */
+	u32 tmr_len;                    /* In */
+	u32 reserved;                   /* In */
+	u64 nv_address;                 /* In/Out */
+	u32 nv_len;                     /* In */
+} __packed;
+
 #define SEV_INIT_FLAGS_SEV_ES	0x01
 
 /**
-- 
cgit v1.2.3


From 244d22ffd656bc8e5c49a2cd2fdfeb0e52a9730f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 10 Dec 2021 16:30:09 +0200
Subject: crypto: api - Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/aead.h        | 4 +++-
 include/crypto/algapi.h      | 5 ++++-
 include/crypto/blake2b.h     | 1 -
 include/crypto/blake2s.h     | 2 +-
 include/crypto/cryptd.h      | 3 ++-
 include/crypto/engine.h      | 6 +++++-
 include/crypto/pcrypt.h      | 2 +-
 include/crypto/scatterwalk.h | 3 ++-
 include/crypto/skcipher.h    | 6 +++++-
 9 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 5af914c1ab8e..14db3bee0519 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -8,9 +8,10 @@
 #ifndef _CRYPTO_AEAD_H
 #define _CRYPTO_AEAD_H
 
+#include <linux/container_of.h>
 #include <linux/crypto.h>
-#include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/types.h>
 
 /**
  * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API
@@ -73,6 +74,7 @@
  */
 
 struct crypto_aead;
+struct scatterlist;
 
 /**
  *	struct aead_request - AEAD request
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 5f6841c73e5a..f76ec723ceae 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -7,9 +7,11 @@
 #ifndef _CRYPTO_ALGAPI_H
 #define _CRYPTO_ALGAPI_H
 
+#include <linux/align.h>
 #include <linux/crypto.h>
+#include <linux/kconfig.h>
 #include <linux/list.h>
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 /*
  * Maximum values for blocksize and alignmask, used to allocate
@@ -24,6 +26,7 @@
 struct crypto_aead;
 struct crypto_instance;
 struct module;
+struct notifier_block;
 struct rtattr;
 struct seq_file;
 struct sk_buff;
diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h
index 18875f16f8ca..0c0176285349 100644
--- a/include/crypto/blake2b.h
+++ b/include/crypto/blake2b.h
@@ -5,7 +5,6 @@
 
 #include <linux/bug.h>
 #include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/string.h>
 
 enum blake2b_lengths {
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index bc3fb59442ce..df3c6c2f9553 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -7,8 +7,8 @@
 #define _CRYPTO_BLAKE2S_H
 
 #include <linux/bug.h>
+#include <linux/kconfig.h>
 #include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/string.h>
 
 enum blake2s_lengths {
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index 23169f4d87e6..796d986e58e1 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -13,7 +13,8 @@
 #ifndef _CRYPTO_CRYPT_H
 #define _CRYPTO_CRYPT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
+
 #include <crypto/aead.h>
 #include <crypto/hash.h>
 #include <crypto/skcipher.h>
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index fd4f2fa23f51..ae133e98d813 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -9,8 +9,10 @@
 
 #include <linux/crypto.h>
 #include <linux/list.h>
-#include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
 #include <crypto/algapi.h>
 #include <crypto/aead.h>
 #include <crypto/akcipher.h>
@@ -18,6 +20,8 @@
 #include <crypto/skcipher.h>
 #include <crypto/kpp.h>
 
+struct device;
+
 #define ENGINE_NAME_LEN	30
 /*
  * struct crypto_engine - crypto hardware engine
diff --git a/include/crypto/pcrypt.h b/include/crypto/pcrypt.h
index b9bc3436196a..234d7cf3cf5e 100644
--- a/include/crypto/pcrypt.h
+++ b/include/crypto/pcrypt.h
@@ -9,8 +9,8 @@
 #ifndef _CRYPTO_PCRYPT_H
 #define _CRYPTO_PCRYPT_H
 
+#include <linux/container_of.h>
 #include <linux/crypto.h>
-#include <linux/kernel.h>
 #include <linux/padata.h>
 
 struct pcrypt_request {
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 7af08174a721..6407b4b61350 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -12,8 +12,9 @@
 #define _CRYPTO_SCATTERWALK_H
 
 #include <crypto/algapi.h>
+
 #include <linux/highmem.h>
-#include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/scatterlist.h>
 
 static inline void scatterwalk_crypto_chain(struct scatterlist *head,
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index ef0fc9ed4342..39f5b67c3069 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -8,9 +8,13 @@
 #ifndef _CRYPTO_SKCIPHER_H
 #define _CRYPTO_SKCIPHER_H
 
+#include <linux/container_of.h>
 #include <linux/crypto.h>
-#include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+struct scatterlist;
 
 /**
  *	struct skcipher_request - Symmetric key cipher request
-- 
cgit v1.2.3


From 9dfa5b6f5efb85efe69fd3b7b0b912004d9547f1 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Thu, 16 Dec 2021 09:17:03 +0800
Subject: iommu/vt-d: Remove unused macros

These macros has no reference in the tree anymore. Cleanup them.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211216011703.763331-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-svm.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 57cceecbe37f..1b73bab7eeff 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -8,12 +8,6 @@
 #ifndef __INTEL_SVM_H__
 #define __INTEL_SVM_H__
 
-/* Values for rxwp in fault_cb callback */
-#define SVM_REQ_READ	(1<<3)
-#define SVM_REQ_WRITE	(1<<2)
-#define SVM_REQ_EXEC	(1<<1)
-#define SVM_REQ_PRIV	(1<<0)
-
 /* Page Request Queue depth */
 #define PRQ_ORDER	2
 #define PRQ_RING_MASK	((0x1000 << PRQ_ORDER) - 0x20)
-- 
cgit v1.2.3


From b62e3317b68d9c84301940ca8ca9c35a584111b2 Mon Sep 17 00:00:00 2001
From: Xiang wangx <wangxiang@cdjrlc.com>
Date: Thu, 16 Dec 2021 23:19:16 +0800
Subject: net: fix typo in a comment

The double 'as' in a comment is repeated, thus it should be removed.

Signed-off-by: Xiang wangx <wangxiang@cdjrlc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index be5cb3360b94..6aadcc0ecb5b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1937,7 +1937,7 @@ enum netdev_ml_priv_type {
  *	@udp_tunnel_nic:	UDP tunnel offload state
  *	@xdp_state:		stores info on attached XDP BPF programs
  *
- *	@nested_level:	Used as as a parameter of spin_lock_nested() of
+ *	@nested_level:	Used as a parameter of spin_lock_nested() of
  *			dev->addr_list_lock.
  *	@unlink_list:	As netif_addr_lock() can be called recursively,
  *			keep a list of interfaces to be deleted.
-- 
cgit v1.2.3


From 60ded273e4c047aec364f70195aced71a4082f90 Mon Sep 17 00:00:00 2001
From: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
Date: Thu, 16 Dec 2021 17:24:22 -0600
Subject: ipc: debug: Add shared memory heap to memory scan

Newly added shared heap zones should be taken into account during
memory usage scanning.

Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Signed-off-by: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20211216232422.345164-4-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/debug.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/sof/debug.h b/include/sound/sof/debug.h
index 3ecb5793789d..38693e3fb514 100644
--- a/include/sound/sof/debug.h
+++ b/include/sound/sof/debug.h
@@ -19,6 +19,8 @@ enum sof_ipc_dbg_mem_zone {
 	SOF_IPC_MEM_ZONE_SYS_RUNTIME	= 1,	/**< System-runtime zone */
 	SOF_IPC_MEM_ZONE_RUNTIME	= 2,	/**< Runtime zone */
 	SOF_IPC_MEM_ZONE_BUFFER		= 3,	/**< Buffer zone */
+	SOF_IPC_MEM_ZONE_RUNTIME_SHARED	= 4,	/**< System runtime zone */
+	SOF_IPC_MEM_ZONE_SYS_SHARED	= 5,	/**< System shared zone */
 };
 
 /** ABI3.18 */
-- 
cgit v1.2.3


From dd61b29207ca4f346fbd9c06bc49f093e3369185 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Tue, 7 Dec 2021 10:34:06 +0100
Subject: gpiolib: provide gpiod_remove_hogs()

Currently all users of gpiod_add_hogs() call it only once at system
init so there never was any need for a mechanism allowing to remove
them. Now the upcoming gpio-sim will need to tear down chips with hogged
lines so provide a function that allows to remove hogs.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/machine.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index d755e529c1e3..2647dd10b541 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -100,6 +100,7 @@ void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
 void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n);
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table);
 void gpiod_add_hogs(struct gpiod_hog *hogs);
+void gpiod_remove_hogs(struct gpiod_hog *hogs);
 #else /* ! CONFIG_GPIOLIB */
 static inline
 void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
@@ -108,6 +109,7 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {}
 static inline
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
 static inline void gpiod_add_hogs(struct gpiod_hog *hogs) {}
+static inline void gpiod_remove_hogs(struct gpiod_hog *hogs) {}
 #endif /* CONFIG_GPIOLIB */
 
 #endif /* __LINUX_GPIO_MACHINE_H */
-- 
cgit v1.2.3


From 990f6756bb64756d2d1033118cded6333b43397d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Mon, 13 Dec 2021 11:16:41 +0100
Subject: gpiolib: allow to specify the firmware node in struct gpio_chip

Software nodes allow us to represent hierarchies for device components
that don't have their struct device representation yet - for instance:
banks of GPIOs under a common GPIO expander. The core gpiolib core
however doesn't offer any way of passing this information from the
drivers.

This extends struct gpio_chip with a pointer to fwnode that can be set
by the driver and used to pass device properties for child nodes.

This is similar to how we handle device-tree sub-nodes with
CONFIG_OF_GPIO enabled.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index a673a359e20b..b0728c8ad90c 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -289,6 +289,7 @@ struct gpio_irq_chip {
  *	number or the name of the SoC IP-block implementing it.
  * @gpiodev: the internal state holder, opaque struct
  * @parent: optional parent device providing the GPIOs
+ * @fwnode: optional fwnode providing this controller's properties
  * @owner: helps prevent removal of modules exporting active GPIOs
  * @request: optional hook for chip-specific activation, such as
  *	enabling module power and clock; may sleep
@@ -377,6 +378,7 @@ struct gpio_chip {
 	const char		*label;
 	struct gpio_device	*gpiodev;
 	struct device		*parent;
+	struct fwnode_handle	*fwnode;
 	struct module		*owner;
 
 	int			(*request)(struct gpio_chip *gc,
-- 
cgit v1.2.3


From a8b10f3d12cfc168a389f3d97e1f762732d4d849 Mon Sep 17 00:00:00 2001
From: Prathamesh Shete <pshete@nvidia.com>
Date: Fri, 10 Dec 2021 17:02:03 +0100
Subject: dt-bindings: gpio: Add Tegra234 support

Extend the existing Tegra186 GPIO controller device tree bindings with
support for the GPIO controller found on Tegra234. The number of pins is
slightly different, but the programming model remains the same.

Signed-off-by: Prathamesh Shete <pshete@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
[treding@nvidia.com: update device tree bindings]
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 include/dt-bindings/gpio/tegra234-gpio.h | 63 ++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 include/dt-bindings/gpio/tegra234-gpio.h

(limited to 'include')

diff --git a/include/dt-bindings/gpio/tegra234-gpio.h b/include/dt-bindings/gpio/tegra234-gpio.h
new file mode 100644
index 000000000000..d7a1f2e298e8
--- /dev/null
+++ b/include/dt-bindings/gpio/tegra234-gpio.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. */
+
+/*
+ * This header provides constants for binding nvidia,tegra234-gpio*.
+ *
+ * The first cell in Tegra's GPIO specifier is the GPIO ID. The macros below
+ * provide names for this.
+ *
+ * The second cell contains standard flag values specified in gpio.h.
+ */
+
+#ifndef _DT_BINDINGS_GPIO_TEGRA234_GPIO_H
+#define _DT_BINDINGS_GPIO_TEGRA234_GPIO_H
+
+#include <dt-bindings/gpio/gpio.h>
+
+/* GPIOs implemented by main GPIO controller */
+#define TEGRA234_MAIN_GPIO_PORT_A   0
+#define TEGRA234_MAIN_GPIO_PORT_B   1
+#define TEGRA234_MAIN_GPIO_PORT_C   2
+#define TEGRA234_MAIN_GPIO_PORT_D   3
+#define TEGRA234_MAIN_GPIO_PORT_E   4
+#define TEGRA234_MAIN_GPIO_PORT_F   5
+#define TEGRA234_MAIN_GPIO_PORT_G   6
+#define TEGRA234_MAIN_GPIO_PORT_H   7
+#define TEGRA234_MAIN_GPIO_PORT_I   8
+#define TEGRA234_MAIN_GPIO_PORT_J   9
+#define TEGRA234_MAIN_GPIO_PORT_K  10
+#define TEGRA234_MAIN_GPIO_PORT_L  11
+#define TEGRA234_MAIN_GPIO_PORT_M  12
+#define TEGRA234_MAIN_GPIO_PORT_N  13
+#define TEGRA234_MAIN_GPIO_PORT_P  14
+#define TEGRA234_MAIN_GPIO_PORT_Q  15
+#define TEGRA234_MAIN_GPIO_PORT_R  16
+#define TEGRA234_MAIN_GPIO_PORT_S  17
+#define TEGRA234_MAIN_GPIO_PORT_T  18
+#define TEGRA234_MAIN_GPIO_PORT_U  19
+#define TEGRA234_MAIN_GPIO_PORT_V  20
+#define TEGRA234_MAIN_GPIO_PORT_X  21
+#define TEGRA234_MAIN_GPIO_PORT_Y  22
+#define TEGRA234_MAIN_GPIO_PORT_Z  23
+#define TEGRA234_MAIN_GPIO_PORT_AC 24
+#define TEGRA234_MAIN_GPIO_PORT_AD 25
+#define TEGRA234_MAIN_GPIO_PORT_AE 26
+#define TEGRA234_MAIN_GPIO_PORT_AF 27
+#define TEGRA234_MAIN_GPIO_PORT_AG 28
+
+#define TEGRA234_MAIN_GPIO(port, offset) \
+	((TEGRA234_MAIN_GPIO_PORT_##port * 8) + offset)
+
+/* GPIOs implemented by AON GPIO controller */
+#define TEGRA234_AON_GPIO_PORT_AA 0
+#define TEGRA234_AON_GPIO_PORT_BB 1
+#define TEGRA234_AON_GPIO_PORT_CC 2
+#define TEGRA234_AON_GPIO_PORT_DD 3
+#define TEGRA234_AON_GPIO_PORT_EE 4
+#define TEGRA234_AON_GPIO_PORT_GG 5
+
+#define TEGRA234_AON_GPIO(port, offset) \
+	((TEGRA234_AON_GPIO_PORT_##port * 8) + offset)
+
+#endif
-- 
cgit v1.2.3


From 733e417518a69b71061c3bafc2bf106109565eee Mon Sep 17 00:00:00 2001
From: Wasin Thonkaew <wasin@wasin.io>
Date: Wed, 3 Nov 2021 19:42:08 +0000
Subject: asm-generic/error-injection.h: fix a spelling mistake, and a coding
 style issue

Fix a spelling mistake "ganerating" -> "generating".
Remove trailing semicolon for a macro ALLOW_ERROR_INJECTION to fix a
coding style issue.

Signed-off-by: Wasin Thonkaew <wasin@wasin.io>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/error-injection.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h
index 7ddd9dc10ce9..fbca56bd9cbc 100644
--- a/include/asm-generic/error-injection.h
+++ b/include/asm-generic/error-injection.h
@@ -20,7 +20,7 @@ struct pt_regs;
 
 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
 /*
- * Whitelist ganerating macro. Specify functions which can be
+ * Whitelist generating macro. Specify functions which can be
  * error-injectable using this macro.
  */
 #define ALLOW_ERROR_INJECTION(fname, _etype)				\
@@ -29,7 +29,7 @@ static struct error_injection_entry __used				\
 	_eil_addr_##fname = {						\
 		.addr = (unsigned long)fname,				\
 		.etype = EI_ETYPE_##_etype,				\
-	};
+	}
 
 void override_function_with_return(struct pt_regs *regs);
 #else
-- 
cgit v1.2.3


From c06ef740d401d0f4ab188882bf6f8d9cf0f75eaf Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 7 Dec 2021 00:20:59 +0000
Subject: PM: core: Redefine pm_ptr() macro

The pm_ptr() macro was previously conditionally defined, according to
the value of the CONFIG_PM option. This meant that the pointed structure
was either referenced (if CONFIG_PM was set), or never referenced (if
CONFIG_PM was not set), causing it to be detected as unused by the
compiler.

This worked fine, but required the __maybe_unused compiler attribute to
be used to every symbol pointed to by a pointer wrapped with pm_ptr().

We can do better. With this change, the pm_ptr() is now defined the
same, independently of the value of CONFIG_PM. It now uses the (?:)
ternary operator to conditionally resolve to its argument. Since the
condition is known at compile time, the compiler will then choose to
discard the unused symbols, which won't need to be tagged with
__maybe_unused anymore.

This pm_ptr() macro is usually used with pointers to dev_pm_ops
structures created with SIMPLE_DEV_PM_OPS() or similar macros. These do
use a __maybe_unused flag, which is now useless with this change, so it
later can be removed. However in the meantime it causes no harm, and all
the drivers still compile fine with the new pm_ptr() macro.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1d8209c09686..b88ac7dcf2a2 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -373,11 +373,7 @@ const struct dev_pm_ops __maybe_unused name = { \
 	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
 
-#ifdef CONFIG_PM
-#define pm_ptr(_ptr) (_ptr)
-#else
-#define pm_ptr(_ptr) NULL
-#endif
+#define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr))
 
 /*
  * PM_EVENT_ messages
-- 
cgit v1.2.3


From 1a3c7bb088266fa2db017be299f91f1c1894c857 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 7 Dec 2021 00:21:00 +0000
Subject: PM: core: Add new *_PM_OPS macros, deprecate old ones

This commit introduces the following macros:

SYSTEM_SLEEP_PM_OPS()
LATE_SYSTEM_SLEEP_PM_OPS()
NOIRQ_SYSTEM_SLEEP_PM_OPS()
RUNTIME_PM_OPS()

These new macros are very similar to their SET_*_PM_OPS() equivalent.
They however differ in the fact that the callbacks they set will always
be seen as referenced by the compiler. This means that the callback
functions don't need to be wrapped with a #ifdef CONFIG_PM guard, or
tagged with __maybe_unused, to prevent the compiler from complaining
about unused static symbols. The compiler will then simply evaluate at
compile time whether or not these symbols are dead code.

The callbacks that are only useful with CONFIG_PM_SLEEP is enabled, are
now also wrapped with a new pm_sleep_ptr() macro, which is inspired from
pm_ptr(). This is needed for drivers that use different callbacks for
sleep and runtime PM, to handle the case where CONFIG_PM is set and
CONFIG_PM_SLEEP is not.

This commit also deprecates the following macros:

SIMPLE_DEV_PM_OPS()
UNIVERSAL_DEV_PM_OPS()

And introduces the following macros:

DEFINE_SIMPLE_DEV_PM_OPS()
DEFINE_UNIVERSAL_DEV_PM_OPS()

These macros are similar to the functions they were created to replace,
with the following differences:

 - They use the new macros introduced above, and as such always
   reference the provided callback functions.

 - They are not tagged with __maybe_unused. They are meant to be used
   with pm_ptr() or pm_sleep_ptr() for DEFINE_UNIVERSAL_DEV_PM_OPS()
   and DEFINE_SIMPLE_DEV_PM_OPS() respectively.

 - They declare the symbol static, since every driver seems to do that
   anyway; and if a non-static use-case is needed an indirection pointer
   could be used.

The point of this change, is to progressively switch from a code model
where PM callbacks are all protected behind CONFIG_PM guards, to a code
model where the PM callbacks are always seen by the compiler, but
discarded if not used.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 74 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 50 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index b88ac7dcf2a2..fc9691cb01b4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -300,47 +300,59 @@ struct dev_pm_ops {
 	int (*runtime_idle)(struct device *dev);
 };
 
+#define SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	.suspend = pm_sleep_ptr(suspend_fn), \
+	.resume = pm_sleep_ptr(resume_fn), \
+	.freeze = pm_sleep_ptr(suspend_fn), \
+	.thaw = pm_sleep_ptr(resume_fn), \
+	.poweroff = pm_sleep_ptr(suspend_fn), \
+	.restore = pm_sleep_ptr(resume_fn),
+
+#define LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	.suspend_late = pm_sleep_ptr(suspend_fn), \
+	.resume_early = pm_sleep_ptr(resume_fn), \
+	.freeze_late = pm_sleep_ptr(suspend_fn), \
+	.thaw_early = pm_sleep_ptr(resume_fn), \
+	.poweroff_late = pm_sleep_ptr(suspend_fn), \
+	.restore_early = pm_sleep_ptr(resume_fn),
+
+#define NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	.suspend_noirq = pm_sleep_ptr(suspend_fn), \
+	.resume_noirq = pm_sleep_ptr(resume_fn), \
+	.freeze_noirq = pm_sleep_ptr(suspend_fn), \
+	.thaw_noirq = pm_sleep_ptr(resume_fn), \
+	.poweroff_noirq = pm_sleep_ptr(suspend_fn), \
+	.restore_noirq = pm_sleep_ptr(resume_fn),
+
+#define RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+	.runtime_suspend = suspend_fn, \
+	.runtime_resume = resume_fn, \
+	.runtime_idle = idle_fn,
+
 #ifdef CONFIG_PM_SLEEP
 #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-	.suspend = suspend_fn, \
-	.resume = resume_fn, \
-	.freeze = suspend_fn, \
-	.thaw = resume_fn, \
-	.poweroff = suspend_fn, \
-	.restore = resume_fn,
+	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #else
 #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #endif
 
 #ifdef CONFIG_PM_SLEEP
 #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-	.suspend_late = suspend_fn, \
-	.resume_early = resume_fn, \
-	.freeze_late = suspend_fn, \
-	.thaw_early = resume_fn, \
-	.poweroff_late = suspend_fn, \
-	.restore_early = resume_fn,
+	LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #else
 #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #endif
 
 #ifdef CONFIG_PM_SLEEP
 #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-	.suspend_noirq = suspend_fn, \
-	.resume_noirq = resume_fn, \
-	.freeze_noirq = suspend_fn, \
-	.thaw_noirq = resume_fn, \
-	.poweroff_noirq = suspend_fn, \
-	.restore_noirq = resume_fn,
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #else
 #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #endif
 
 #ifdef CONFIG_PM
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
-	.runtime_suspend = suspend_fn, \
-	.runtime_resume = resume_fn, \
-	.runtime_idle = idle_fn,
+	RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #else
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #endif
@@ -349,9 +361,9 @@ struct dev_pm_ops {
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.
  */
-#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-const struct dev_pm_ops __maybe_unused name = { \
-	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+#define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+static const struct dev_pm_ops name = { \
+	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
 }
 
 /*
@@ -367,6 +379,19 @@ const struct dev_pm_ops __maybe_unused name = { \
  * .resume_early(), to the same routines as .runtime_suspend() and
  * .runtime_resume(), respectively (and analogously for hibernation).
  */
+#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+static const struct dev_pm_ops name = { \
+	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+}
+
+/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
+#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+const struct dev_pm_ops __maybe_unused name = { \
+	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+}
+
+/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */
 #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
 const struct dev_pm_ops __maybe_unused name = { \
 	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
@@ -374,6 +399,7 @@ const struct dev_pm_ops __maybe_unused name = { \
 }
 
 #define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr))
+#define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr))
 
 /*
  * PM_EVENT_ messages
-- 
cgit v1.2.3


From 931da6a0de5d620425af4425344259e6ff46b654 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 7 Dec 2021 21:17:34 +0800
Subject: powercap: intel_rapl: support new layout of Psys PowerLimit Register
 on SPR

On Sapphire Rapids, the layout of the Psys domain Power Limit Register
is different from from what it was before.

Enhance the code to support the new Psys PL register layout.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Reported-and-tested-by: Alkattan Dana <dana.alkattan@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/intel_rapl.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 93780834fc8f..9f4b6f5b822f 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -58,6 +58,12 @@ enum rapl_primitives {
 	THROTTLED_TIME,
 	PRIORITY_LEVEL,
 
+	PSYS_POWER_LIMIT1,
+	PSYS_POWER_LIMIT2,
+	PSYS_PL1_ENABLE,
+	PSYS_PL2_ENABLE,
+	PSYS_TIME_WINDOW1,
+	PSYS_TIME_WINDOW2,
 	/* below are not raw primitive data */
 	AVERAGE_POWER,
 	NR_RAPL_PRIMITIVES,
-- 
cgit v1.2.3


From c24efa6732788f0be22cdf5d2aedd5e3117e983f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 7 Dec 2021 19:54:32 +0100
Subject: PM: runtime: Capture device status before disabling runtime PM

In some cases (for example, during system-wide suspend and resume of
devices) it is useful to know whether or not runtime PM has ever been
enabled for a given device and, if so, what the runtime PM status of
it had been right before runtime PM was disabled for it last time.

For this reason, introduce a new struct dev_pm_info field called
last_status that will be used for capturing the runtime PM status of
the device when its power.disable_depth counter changes from 0 to 1.

The new field will be set to RPM_INVALID to start with and whenever
power.disable_depth changes from 1 to 0, so it will be valid only
when runtime PM of the device is currently disabled, but it has been
enabled at least once.

Immediately use power.last_status in rpm_resume() to make it handle
the case when PM runtime is disabled for the device, but its runtime
PM status is RPM_ACTIVE more consistently.  Namely, make it return 1
if power.last_status is also equal to RPM_ACTIVE in that case (the
idea being that if the status was RPM_ACTIVE last time when
power.disable_depth was changing from 0 to 1 and it is still
RPM_ACTIVE, it can be assumed to reflect what happened to the device
last time when it was using runtime PM) and -EACCES otherwise.

Update the documentation to provide a description of last_status and
change the description of pm_runtime_resume() in it to reflect the
new behavior of rpm_active().

While at it, rearrange the code in pm_runtime_enable() to be more
straightforward and replace the WARN() macro in it with a pr_warn()
invocation which is less disruptive.

Link: https://lore.kernel.org/linux-pm/20211026222626.39222-1-ulf.hansson@linaro.org/t/#u
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index fc9691cb01b4..e1e9402180b9 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -521,6 +521,7 @@ const struct dev_pm_ops __maybe_unused name = { \
  */
 
 enum rpm_status {
+	RPM_INVALID = -1,
 	RPM_ACTIVE = 0,
 	RPM_RESUMING,
 	RPM_SUSPENDED,
@@ -634,6 +635,7 @@ struct dev_pm_info {
 	unsigned int		links_count;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
+	enum rpm_status		last_status;
 	int			runtime_error;
 	int			autosuspend_delay;
 	u64			last_busy;
-- 
cgit v1.2.3


From d1579e61192e0e686faa4208500ef4c3b529b16c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 10 Dec 2021 17:10:13 +0100
Subject: PM: runtime: Add safety net to supplier device release

Because refcount_dec_not_one() returns true if the target refcount
becomes saturated, it is generally unsafe to use its return value as
a loop termination condition, but that is what happens when a device
link's supplier device is released during runtime PM suspend
operations and on device link removal.

To address this, introduce pm_runtime_release_supplier() to be used
in the above cases which will check the supplier device's runtime
PM usage counter in addition to the refcount_dec_not_one() return
value, so the loop can be terminated in case the rpm_active refcount
value becomes invalid, and update the code in question to use it as
appropriate.

This change is not expected to have any visible functional impact.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/pm_runtime.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index eddd66d426ca..016de5776b6d 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
 extern void pm_runtime_drop_link(struct device_link *link);
+extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
 
 extern int devm_pm_runtime_enable(struct device *dev);
 
@@ -283,6 +284,8 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {}
 static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
 static inline void pm_runtime_drop_link(struct device_link *link) {}
+static inline void pm_runtime_release_supplier(struct device_link *link,
+					       bool check_idle) {}
 
 #endif /* !CONFIG_PM */
 
-- 
cgit v1.2.3


From 227fee5fc99eeb74d43bf68832f6d59d30ac07d8 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 16 Dec 2021 13:42:25 +0530
Subject: bus: mhi: core: Add an API for auto queueing buffers for DL channel

Add a new API "mhi_prepare_for_transfer_autoqueue" for using with client
drivers like QRTR to request MHI core to autoqueue buffers for the DL
channel along with starting both UL and DL channels.

So far, the "auto_queue" flag specified by the controller drivers in
channel definition served this purpose but this will be removed at some
point in future.

Cc: netdev@vger.kernel.org
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Co-developed-by: Loic Poulain <loic.poulain@linaro.org>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20211216081227.237749-9-manivannan.sadhasivam@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mhi.h | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index a5cc4cdf9cc8..a5441ad33c74 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -730,15 +730,26 @@ void mhi_device_put(struct mhi_device *mhi_dev);
 
 /**
  * mhi_prepare_for_transfer - Setup UL and DL channels for data transfer.
- *                            Allocate and initialize the channel context and
- *                            also issue the START channel command to both
- *                            channels. Channels can be started only if both
- *                            host and device execution environments match and
- *                            channels are in a DISABLED state.
  * @mhi_dev: Device associated with the channels
+ *
+ * Allocate and initialize the channel context and also issue the START channel
+ * command to both channels. Channels can be started only if both host and
+ * device execution environments match and channels are in a DISABLED state.
  */
 int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
 
+/**
+ * mhi_prepare_for_transfer_autoqueue - Setup UL and DL channels with auto queue
+ *                                      buffers for DL traffic
+ * @mhi_dev: Device associated with the channels
+ *
+ * Allocate and initialize the channel context and also issue the START channel
+ * command to both channels. Channels can be started only if both host and
+ * device execution environments match and channels are in a DISABLED state.
+ * The MHI core will automatically allocate and queue buffers for the DL traffic.
+ */
+int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev);
+
 /**
  * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
  *                               Issue the RESET channel command and let the
-- 
cgit v1.2.3


From 49f39cb0ef198ae3c73765c9b9ee3034e4c9f076 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 1 Dec 2021 14:59:30 +0200
Subject: device property: Fix documentation for FWNODE_GRAPH_DEVICE_DISABLED

FWNODE_GRAPH_DEVICE_DISABLED flag was meant for also returning endpoints
connected to disabled devices, but it also may return endpoints that are
not connected. Fix this in documentation. Also
fwnode_graph_get_endpoint_by_id() was affeced by this.

Also improve the language a little bit.

Fixes: 0fcc2bdc8aff ("device property: Add fwnode_graph_get_endpoint_by_id()")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/property.h b/include/linux/property.h
index 16f736c698a2..7a2df45ec3ae 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -414,7 +414,8 @@ static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode)
  *				one.
  * @FWNODE_GRAPH_DEVICE_DISABLED: That the device to which the remote
  *				  endpoint of the given endpoint belongs to,
- *				  may be disabled.
+ *				  may be disabled, or that the endpoint is not
+ *				  connected.
  */
 #define FWNODE_GRAPH_ENDPOINT_NEXT	BIT(0)
 #define FWNODE_GRAPH_DEVICE_DISABLED	BIT(1)
-- 
cgit v1.2.3


From c87b8fc569667610b4891cad1e4a663e5a94d8f8 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 1 Dec 2021 14:59:33 +0200
Subject: device property: Implement fwnode_graph_get_endpoint_count()

Add fwnode_graph_get_endpoint_count() function to provide generic
implementation of of_graph_get_endpoint_count(). The former by default
only counts endpoints to available devices which is consistent with the
rest of the fwnode graph API. By providing FWNODE_GRAPH_DEVICE_DISABLED
flag, also unconnected endpoints and endpoints to disabled devices are
counted.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/property.h b/include/linux/property.h
index 7a2df45ec3ae..8c0104871252 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -423,6 +423,8 @@ static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode)
 struct fwnode_handle *
 fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
 				u32 port, u32 endpoint, unsigned long flags);
+unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode,
+					     unsigned long flags);
 
 #define fwnode_graph_for_each_endpoint(fwnode, child)			\
 	for (child = NULL;						\
-- 
cgit v1.2.3


From c49eea6ffec626c059ace085fce1bf501b05dbc7 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 1 Dec 2021 15:01:15 +0200
Subject: device property: Drop fwnode_graph_get_remote_node()

fwnode_graph_get_remote_node() is only used by the tegra-video driver.
Convert it to use newer fwnode_graph_get_endpoint_by_id() and drop
now-unused fwnode_graph_get_remote_node().

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/property.h b/include/linux/property.h
index 8c0104871252..8355f99ebd47 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -397,9 +397,6 @@ struct fwnode_handle *fwnode_graph_get_remote_port(
 	const struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_endpoint(
 	const struct fwnode_handle *fwnode);
-struct fwnode_handle *
-fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port,
-			     u32 endpoint);
 
 static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode)
 {
-- 
cgit v1.2.3


From e3c963c498871e0d4b2eceb32e2b989493838ccc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Dec 2021 17:36:20 +0100
Subject: ACPI: scan: Introduce acpi_fetch_acpi_dev()

Introduce acpi_fetch_acpi_dev() as a more reasonable replacement for
acpi_bus_get_device() and modify the code in scan.c to use it instead
of the latter.

No expected functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 include/acpi/acpi_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 480f9207a4c6..17f700c9a4f9 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -505,6 +505,7 @@ extern int unregister_acpi_notifier(struct notifier_block *);
  */
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
+struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle);
 acpi_status acpi_bus_get_status_handle(acpi_handle handle,
 				       unsigned long long *sta);
 int acpi_bus_get_status(struct acpi_device *device);
-- 
cgit v1.2.3


From bd0b536dc2e1e9828a85b1e3470ee7bafc3b36f6 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Mon, 29 Nov 2021 16:15:59 -0800
Subject: virtchnl: Add support for new VLAN capabilities

Currently VIRTCHNL only allows for VLAN filtering and offloads to happen
on a single 802.1Q VLAN. Add support to filter and offload on inner,
outer, and/or inner + outer VLANs.

This is done by introducing the new capability
VIRTCHNL_VF_OFFLOAD_VLAN_V2. The flow to negotiate this new capability
is shown below.

1. VF - sets the VIRTCHNL_VF_OFFLOAD_VLAN_V2 bit in the
   virtchnl_vf_resource.vf_caps_flags during the
   VIRTCHNL_OP_GET_VF_RESOURCES request message. The VF should also set
   the VIRTCHNL_VF_OFFLOAD_VLAN bit in case the PF driver doesn't support
   the new capability.

2. PF - sets the VLAN capability bit it supports in the
   VIRTCHNL_OP_GET_VF_RESOURCES response message. This will either be
   VIRTCHNL_VF_OFFLOAD_VLAN_V2, VIRTCHNL_VF_OFFLOAD_VLAN, or none.

3. VF - If the VIRTCHNL_VF_OFFLOAD_VLAN_V2 capability was ACK'd by the
   PF, then the VF needs to request the VLAN capabilities of the
   PF/Device by issuing a VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS request.
   If the VIRTCHNL_VF_OFFLOAD_VLAN capability was ACK'd then the VF
   knows only single 802.1Q VLAN filtering/offloads are supported. If no
   VLAN capability is ACK'd then the PF/Device doesn't support hardware
   VLAN filtering/offloads for this VF.

4. PF - Populates the virtchnl_vlan_caps structure based on what it
   allows/supports for that VF and sends that response via
   VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS.

After VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS is successfully negotiated
the VF driver needs to interpret the capabilities supported by the
underlying PF/Device. The VF will be allowed to filter/offload the
inner 802.1Q, outer (various ethertype), inner 802.1Q + outer
(various ethertypes), or none based on which fields are set.

The VF will also need to interpret where the VLAN tag should be inserted
and/or stripped based on the negotiated capabilities.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 377 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 377 insertions(+)

(limited to 'include')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index b30a1bc74fc7..2ce27e8e4f19 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -141,6 +141,13 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DEL_RSS_CFG = 46,
 	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
 	VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
+	VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS = 51,
+	VIRTCHNL_OP_ADD_VLAN_V2 = 52,
+	VIRTCHNL_OP_DEL_VLAN_V2 = 53,
+	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 = 54,
+	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55,
+	VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56,
+	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_MAX,
 };
 
@@ -246,6 +253,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES		BIT(6)
 /* used to negotiate communicating link speeds in Mbps */
 #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		BIT(7)
+#define VIRTCHNL_VF_OFFLOAD_VLAN_V2		BIT(15)
 #define VIRTCHNL_VF_OFFLOAD_VLAN		BIT(16)
 #define VIRTCHNL_VF_OFFLOAD_RX_POLLING		BIT(17)
 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	BIT(18)
@@ -475,6 +483,351 @@ struct virtchnl_vlan_filter_list {
 
 VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list);
 
+/* This enum is used for all of the VIRTCHNL_VF_OFFLOAD_VLAN_V2_CAPS related
+ * structures and opcodes.
+ *
+ * VIRTCHNL_VLAN_UNSUPPORTED - This field is not supported and if a VF driver
+ * populates it the PF should return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED.
+ *
+ * VIRTCHNL_VLAN_ETHERTYPE_8100 - This field supports 0x8100 ethertype.
+ * VIRTCHNL_VLAN_ETHERTYPE_88A8 - This field supports 0x88A8 ethertype.
+ * VIRTCHNL_VLAN_ETHERTYPE_9100 - This field supports 0x9100 ethertype.
+ *
+ * VIRTCHNL_VLAN_ETHERTYPE_AND - Used when multiple ethertypes can be supported
+ * by the PF concurrently. For example, if the PF can support
+ * VIRTCHNL_VLAN_ETHERTYPE_8100 AND VIRTCHNL_VLAN_ETHERTYPE_88A8 filters it
+ * would OR the following bits:
+ *
+ *	VIRTHCNL_VLAN_ETHERTYPE_8100 |
+ *	VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ *	VIRTCHNL_VLAN_ETHERTYPE_AND;
+ *
+ * The VF would interpret this as VLAN filtering can be supported on both 0x8100
+ * and 0x88A8 VLAN ethertypes.
+ *
+ * VIRTCHNL_ETHERTYPE_XOR - Used when only a single ethertype can be supported
+ * by the PF concurrently. For example if the PF can support
+ * VIRTCHNL_VLAN_ETHERTYPE_8100 XOR VIRTCHNL_VLAN_ETHERTYPE_88A8 stripping
+ * offload it would OR the following bits:
+ *
+ *	VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ *	VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ *	VIRTCHNL_VLAN_ETHERTYPE_XOR;
+ *
+ * The VF would interpret this as VLAN stripping can be supported on either
+ * 0x8100 or 0x88a8 VLAN ethertypes. So when requesting VLAN stripping via
+ * VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 the specified ethertype will override
+ * the previously set value.
+ *
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1 - Used to tell the VF to insert and/or
+ * strip the VLAN tag using the L2TAG1 field of the Tx/Rx descriptors.
+ *
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 - Used to tell the VF to insert hardware
+ * offloaded VLAN tags using the L2TAG2 field of the Tx descriptor.
+ *
+ * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 - Used to tell the VF to strip hardware
+ * offloaded VLAN tags using the L2TAG2_2 field of the Rx descriptor.
+ *
+ * VIRTCHNL_VLAN_PRIO - This field supports VLAN priority bits. This is used for
+ * VLAN filtering if the underlying PF supports it.
+ *
+ * VIRTCHNL_VLAN_TOGGLE_ALLOWED - This field is used to say whether a
+ * certain VLAN capability can be toggled. For example if the underlying PF/CP
+ * allows the VF to toggle VLAN filtering, stripping, and/or insertion it should
+ * set this bit along with the supported ethertypes.
+ */
+enum virtchnl_vlan_support {
+	VIRTCHNL_VLAN_UNSUPPORTED =		0,
+	VIRTCHNL_VLAN_ETHERTYPE_8100 =		BIT(0),
+	VIRTCHNL_VLAN_ETHERTYPE_88A8 =		BIT(1),
+	VIRTCHNL_VLAN_ETHERTYPE_9100 =		BIT(2),
+	VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1 =	BIT(8),
+	VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 =	BIT(9),
+	VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 =	BIT(10),
+	VIRTCHNL_VLAN_PRIO =			BIT(24),
+	VIRTCHNL_VLAN_FILTER_MASK =		BIT(28),
+	VIRTCHNL_VLAN_ETHERTYPE_AND =		BIT(29),
+	VIRTCHNL_VLAN_ETHERTYPE_XOR =		BIT(30),
+	VIRTCHNL_VLAN_TOGGLE =			BIT(31),
+};
+
+/* This structure is used as part of the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS
+ * for filtering, insertion, and stripping capabilities.
+ *
+ * If only outer capabilities are supported (for filtering, insertion, and/or
+ * stripping) then this refers to the outer most or single VLAN from the VF's
+ * perspective.
+ *
+ * If only inner capabilities are supported (for filtering, insertion, and/or
+ * stripping) then this refers to the outer most or single VLAN from the VF's
+ * perspective. Functionally this is the same as if only outer capabilities are
+ * supported. The VF driver is just forced to use the inner fields when
+ * adding/deleting filters and enabling/disabling offloads (if supported).
+ *
+ * If both outer and inner capabilities are supported (for filtering, insertion,
+ * and/or stripping) then outer refers to the outer most or single VLAN and
+ * inner refers to the second VLAN, if it exists, in the packet.
+ *
+ * There is no support for tunneled VLAN offloads, so outer or inner are never
+ * referring to a tunneled packet from the VF's perspective.
+ */
+struct virtchnl_vlan_supported_caps {
+	u32 outer;
+	u32 inner;
+};
+
+/* The PF populates these fields based on the supported VLAN filtering. If a
+ * field is VIRTCHNL_VLAN_UNSUPPORTED then it's not supported and the PF will
+ * reject any VIRTCHNL_OP_ADD_VLAN_V2 or VIRTCHNL_OP_DEL_VLAN_V2 messages using
+ * the unsupported fields.
+ *
+ * Also, a VF is only allowed to toggle its VLAN filtering setting if the
+ * VIRTCHNL_VLAN_TOGGLE bit is set.
+ *
+ * The ethertype(s) specified in the ethertype_init field are the ethertypes
+ * enabled for VLAN filtering. VLAN filtering in this case refers to the outer
+ * most VLAN from the VF's perspective. If both inner and outer filtering are
+ * allowed then ethertype_init only refers to the outer most VLAN as only
+ * VLAN ethertype supported for inner VLAN filtering is
+ * VIRTCHNL_VLAN_ETHERTYPE_8100. By default, inner VLAN filtering is disabled
+ * when both inner and outer filtering are allowed.
+ *
+ * The max_filters field tells the VF how many VLAN filters it's allowed to have
+ * at any one time. If it exceeds this amount and tries to add another filter,
+ * then the request will be rejected by the PF. To prevent failures, the VF
+ * should keep track of how many VLAN filters it has added and not attempt to
+ * add more than max_filters.
+ */
+struct virtchnl_vlan_filtering_caps {
+	struct virtchnl_vlan_supported_caps filtering_support;
+	u32 ethertype_init;
+	u16 max_filters;
+	u8 pad[2];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vlan_filtering_caps);
+
+/* This enum is used for the virtchnl_vlan_offload_caps structure to specify
+ * if the PF supports a different ethertype for stripping and insertion.
+ *
+ * VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION - The ethertype(s) specified
+ * for stripping affect the ethertype(s) specified for insertion and visa versa
+ * as well. If the VF tries to configure VLAN stripping via
+ * VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 with VIRTCHNL_VLAN_ETHERTYPE_8100 then
+ * that will be the ethertype for both stripping and insertion.
+ *
+ * VIRTCHNL_ETHERTYPE_MATCH_NOT_REQUIRED - The ethertype(s) specified for
+ * stripping do not affect the ethertype(s) specified for insertion and visa
+ * versa.
+ */
+enum virtchnl_vlan_ethertype_match {
+	VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION = 0,
+	VIRTCHNL_ETHERTYPE_MATCH_NOT_REQUIRED = 1,
+};
+
+/* The PF populates these fields based on the supported VLAN offloads. If a
+ * field is VIRTCHNL_VLAN_UNSUPPORTED then it's not supported and the PF will
+ * reject any VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 or
+ * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 messages using the unsupported fields.
+ *
+ * Also, a VF is only allowed to toggle its VLAN offload setting if the
+ * VIRTCHNL_VLAN_TOGGLE_ALLOWED bit is set.
+ *
+ * The VF driver needs to be aware of how the tags are stripped by hardware and
+ * inserted by the VF driver based on the level of offload support. The PF will
+ * populate these fields based on where the VLAN tags are expected to be
+ * offloaded via the VIRTHCNL_VLAN_TAG_LOCATION_* bits. The VF will need to
+ * interpret these fields. See the definition of the
+ * VIRTCHNL_VLAN_TAG_LOCATION_* bits above the virtchnl_vlan_support
+ * enumeration.
+ */
+struct virtchnl_vlan_offload_caps {
+	struct virtchnl_vlan_supported_caps stripping_support;
+	struct virtchnl_vlan_supported_caps insertion_support;
+	u32 ethertype_init;
+	u8 ethertype_match;
+	u8 pad[3];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_vlan_offload_caps);
+
+/* VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS
+ * VF sends this message to determine its VLAN capabilities.
+ *
+ * PF will mark which capabilities it supports based on hardware support and
+ * current configuration. For example, if a port VLAN is configured the PF will
+ * not allow outer VLAN filtering, stripping, or insertion to be configured so
+ * it will block these features from the VF.
+ *
+ * The VF will need to cross reference its capabilities with the PFs
+ * capabilities in the response message from the PF to determine the VLAN
+ * support.
+ */
+struct virtchnl_vlan_caps {
+	struct virtchnl_vlan_filtering_caps filtering;
+	struct virtchnl_vlan_offload_caps offloads;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_caps);
+
+struct virtchnl_vlan {
+	u16 tci;	/* tci[15:13] = PCP and tci[11:0] = VID */
+	u16 tci_mask;	/* only valid if VIRTCHNL_VLAN_FILTER_MASK set in
+			 * filtering caps
+			 */
+	u16 tpid;	/* 0x8100, 0x88a8, etc. and only type(s) set in
+			 * filtering caps. Note that tpid here does not refer to
+			 * VIRTCHNL_VLAN_ETHERTYPE_*, but it refers to the
+			 * actual 2-byte VLAN TPID
+			 */
+	u8 pad[2];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vlan);
+
+struct virtchnl_vlan_filter {
+	struct virtchnl_vlan inner;
+	struct virtchnl_vlan outer;
+	u8 pad[16];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(32, virtchnl_vlan_filter);
+
+/* VIRTCHNL_OP_ADD_VLAN_V2
+ * VIRTCHNL_OP_DEL_VLAN_V2
+ *
+ * VF sends these messages to add/del one or more VLAN tag filters for Rx
+ * traffic.
+ *
+ * The PF attempts to add the filters and returns status.
+ *
+ * The VF should only ever attempt to add/del virtchnl_vlan_filter(s) using the
+ * supported fields negotiated via VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS.
+ */
+struct virtchnl_vlan_filter_list_v2 {
+	u16 vport_id;
+	u16 num_elements;
+	u8 pad[4];
+	struct virtchnl_vlan_filter filters[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_filter_list_v2);
+
+/* VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2
+ * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2
+ * VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2
+ * VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2
+ *
+ * VF sends this message to enable or disable VLAN stripping or insertion. It
+ * also needs to specify an ethertype. The VF knows which VLAN ethertypes are
+ * allowed and whether or not it's allowed to enable/disable the specific
+ * offload via the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS message. The VF needs to
+ * parse the virtchnl_vlan_caps.offloads fields to determine which offload
+ * messages are allowed.
+ *
+ * For example, if the PF populates the virtchnl_vlan_caps.offloads in the
+ * following manner the VF will be allowed to enable and/or disable 0x8100 inner
+ * VLAN insertion and/or stripping via the opcodes listed above. Inner in this
+ * case means the outer most or single VLAN from the VF's perspective. This is
+ * because no outer offloads are supported. See the comments above the
+ * virtchnl_vlan_supported_caps structure for more details.
+ *
+ * virtchnl_vlan_caps.offloads.stripping_support.inner =
+ *			VIRTCHNL_VLAN_TOGGLE |
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100;
+ *
+ * virtchnl_vlan_caps.offloads.insertion_support.inner =
+ *			VIRTCHNL_VLAN_TOGGLE |
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100;
+ *
+ * In order to enable inner (again note that in this case inner is the outer
+ * most or single VLAN from the VF's perspective) VLAN stripping for 0x8100
+ * VLANs, the VF would populate the virtchnl_vlan_setting structure in the
+ * following manner and send the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 message.
+ *
+ * virtchnl_vlan_setting.inner_ethertype_setting =
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100;
+ *
+ * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on
+ * initialization.
+ *
+ * The reason that VLAN TPID(s) are not being used for the
+ * outer_ethertype_setting and inner_ethertype_setting fields is because it's
+ * possible a device could support VLAN insertion and/or stripping offload on
+ * multiple ethertypes concurrently, so this method allows a VF to request
+ * multiple ethertypes in one message using the virtchnl_vlan_support
+ * enumeration.
+ *
+ * For example, if the PF populates the virtchnl_vlan_caps.offloads in the
+ * following manner the VF will be allowed to enable 0x8100 and 0x88a8 outer
+ * VLAN insertion and stripping simultaneously. The
+ * virtchnl_vlan_caps.offloads.ethertype_match field will also have to be
+ * populated based on what the PF can support.
+ *
+ * virtchnl_vlan_caps.offloads.stripping_support.outer =
+ *			VIRTCHNL_VLAN_TOGGLE |
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_AND;
+ *
+ * virtchnl_vlan_caps.offloads.insertion_support.outer =
+ *			VIRTCHNL_VLAN_TOGGLE |
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_AND;
+ *
+ * In order to enable outer VLAN stripping for 0x8100 and 0x88a8 VLANs, the VF
+ * would populate the virthcnl_vlan_offload_structure in the following manner
+ * and send the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 message.
+ *
+ * virtchnl_vlan_setting.outer_ethertype_setting =
+ *			VIRTHCNL_VLAN_ETHERTYPE_8100 |
+ *			VIRTHCNL_VLAN_ETHERTYPE_88A8;
+ *
+ * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on
+ * initialization.
+ *
+ * There is also the case where a PF and the underlying hardware can support
+ * VLAN offloads on multiple ethertypes, but not concurrently. For example, if
+ * the PF populates the virtchnl_vlan_caps.offloads in the following manner the
+ * VF will be allowed to enable and/or disable 0x8100 XOR 0x88a8 outer VLAN
+ * offloads. The ethertypes must match for stripping and insertion.
+ *
+ * virtchnl_vlan_caps.offloads.stripping_support.outer =
+ *			VIRTCHNL_VLAN_TOGGLE |
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_XOR;
+ *
+ * virtchnl_vlan_caps.offloads.insertion_support.outer =
+ *			VIRTCHNL_VLAN_TOGGLE |
+ *			VIRTCHNL_VLAN_ETHERTYPE_8100 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+ *			VIRTCHNL_VLAN_ETHERTYPE_XOR;
+ *
+ * virtchnl_vlan_caps.offloads.ethertype_match =
+ *			VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+ *
+ * In order to enable outer VLAN stripping for 0x88a8 VLANs, the VF would
+ * populate the virtchnl_vlan_setting structure in the following manner and send
+ * the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2. Also, this will change the
+ * ethertype for VLAN insertion if it's enabled. So, for completeness, a
+ * VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 with the same ethertype should be sent.
+ *
+ * virtchnl_vlan_setting.outer_ethertype_setting = VIRTHCNL_VLAN_ETHERTYPE_88A8;
+ *
+ * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on
+ * initialization.
+ */
+struct virtchnl_vlan_setting {
+	u32 outer_ethertype_setting;
+	u32 inner_ethertype_setting;
+	u16 vport_id;
+	u8 pad[6];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vlan_setting);
+
 /* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
  * VF sends VSI id and flags.
  * PF returns status code in retval.
@@ -1156,6 +1509,30 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DEL_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_del);
 		break;
+	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+		break;
+	case VIRTCHNL_OP_ADD_VLAN_V2:
+	case VIRTCHNL_OP_DEL_VLAN_V2:
+		valid_len = sizeof(struct virtchnl_vlan_filter_list_v2);
+		if (msglen >= valid_len) {
+			struct virtchnl_vlan_filter_list_v2 *vfl =
+			    (struct virtchnl_vlan_filter_list_v2 *)msg;
+
+			valid_len += (vfl->num_elements - 1) *
+				sizeof(struct virtchnl_vlan_filter);
+
+			if (vfl->num_elements == 0) {
+				err_msg_format = true;
+				break;
+			}
+		}
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2:
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+	case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2:
+	case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
+		valid_len = sizeof(struct virtchnl_vlan_setting);
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
-- 
cgit v1.2.3


From 877fee2a0c65a3b0b6ac0e90d7d7718b5a0341d3 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Dec 2021 15:15:15 +0100
Subject: PCI: Convert pci_dev_present() stub to static inline

Change the pci_dev_present() stub which is used when CONFIG_PCI is not set
from a #define to a static inline stub.

Thix should fix clang -Werror builds failing due to errors like this:

  drivers/platform/x86/thinkpad_acpi.c:4475:35:
   error: unused variable 'fwbug_cards_ids' [-Werror,-Wunused-const-variable]

Where fwbug_cards_ids is an array of pci_device_id passed to
pci_dev_present() during a quirk check.

Link: https://lore.kernel.org/r/20211217141515.379586-1-hdegoede@redhat.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: platform-driver-x86@vger.kernel.org
---
 include/linux/pci.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 18a75c8e615c..7d825637d7ca 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1775,7 +1775,10 @@ static inline struct pci_dev *pci_get_class(unsigned int class,
 					    struct pci_dev *from)
 { return NULL; }
 
-#define pci_dev_present(ids)	(0)
+
+static inline int pci_dev_present(const struct pci_device_id *ids)
+{ return 0; }
+
 #define no_pci_devices()	(1)
 #define pci_dev_put(dev)	do { } while (0)
 
-- 
cgit v1.2.3


From ec624fe740b416fb68d536b37fb8eef46f90b5c2 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Tue, 14 Dec 2021 19:24:33 +0200
Subject: net/sched: Extend qdisc control block with tc control block

BPF layer extends the qdisc control block via struct bpf_skb_data_end
and because of that there is no more room to add variables to the
qdisc layer control block without going over the skb->cb size.

Extend the qdisc control block with a tc control block,
and move all tc related variables to there as a pre-step for
extending the tc control block with additional members.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/pkt_sched.h   | 15 +++++++++++++++
 include/net/sch_generic.h |  2 --
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bf79f3a890af..05f18e81f3e8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
 	skb->tstamp = ktime_set(0, 0);
 }
 
+struct tc_skb_cb {
+	struct qdisc_skb_cb qdisc_cb;
+
+	u16 mru;
+	bool post_ct;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+	struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+	BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+	return cb;
+}
+
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22179b2fda72..c70e6d2b2fdd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -447,8 +447,6 @@ struct qdisc_skb_cb {
 	};
 #define QDISC_CB_PRIV_LEN 20
 	unsigned char		data[QDISC_CB_PRIV_LEN];
-	u16			mru;
-	bool			post_ct;
 };
 
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
-- 
cgit v1.2.3


From 3849595866166b23bf6a0cb9ff87e06423167f67 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Tue, 14 Dec 2021 19:24:34 +0200
Subject: net/sched: flow_dissector: Fix matching on zone id for invalid conns

If ct rejects a flow, it removes the conntrack info from the skb.
act_ct sets the post_ct variable so the dissector will see this case
as an +tracked +invalid state, but the zone id is lost with the
conntrack info.

To restore the zone id on such cases, set the last executed zone,
via the tc control block, when passing ct, and read it back in the
dissector if there is no ct info on the skb (invalid connection).

Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h  | 2 +-
 include/net/pkt_sched.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8cb7e697d47..2ecf8cfd2223 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
 		    struct flow_dissector *flow_dissector,
 		    void *target_container,
 		    u16 *ctinfo_map, size_t mapsize,
-		    bool post_ct);
+		    bool post_ct, u16 zone);
 void
 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 			     struct flow_dissector *flow_dissector,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 05f18e81f3e8..9e71691c491b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -198,6 +198,7 @@ struct tc_skb_cb {
 
 	u16 mru;
 	bool post_ct;
+	u16 zone; /* Only valid if post_ct = true */
 };
 
 static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
-- 
cgit v1.2.3


From 635d448a1cce4b4ebee52b351052c70434fa90ea Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Tue, 14 Dec 2021 19:24:35 +0200
Subject: net: openvswitch: Fix matching zone id for invalid conns arriving
 from tc

Zone id is not restored if we passed ct and ct rejected the connection,
as there is no ct info on the skb.

Save the zone from tc skb cb to tc skb extension and pass it on to
ovs, use that info to restore the zone id for invalid connections.

Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ecf8cfd2223..4507d77d6941 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -286,6 +286,7 @@ struct nf_bridge_info {
 struct tc_skb_ext {
 	__u32 chain;
 	__u16 mru;
+	__u16 zone;
 	bool post_ct;
 };
 #endif
-- 
cgit v1.2.3


From 6e478521df535b9d5ef5eb84d4352f235bbbef99 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 30 Jul 2021 09:56:05 -0400
Subject: iomap,xfs: Convert ->discard_page to ->discard_folio

XFS has the only implementation of ->discard_page today, so convert it
to use folios in the same patch as converting the API.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/iomap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 29491fb9c5ba..5ef5088dbbd8 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -285,7 +285,7 @@ struct iomap_writeback_ops {
 	 * Optional, allows the file system to discard state on a page where
 	 * we failed to submit any I/O.
 	 */
-	void (*discard_page)(struct page *page, loff_t fileoff);
+	void (*discard_folio)(struct folio *folio, loff_t pos);
 };
 
 struct iomap_writepage_ctx {
-- 
cgit v1.2.3


From e17f7a0bc4daa44a4809f5f2f947aa2aa74d1369 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Dec 2021 09:45:05 +0100
Subject: uio: remove copy_from_iter_flushcache() and copy_mc_to_iter()

These two wrappers are never used.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211215084508.435401-2-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/uio.h | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6350354f97e9..494d552c1d66 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -196,7 +196,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 /*
  * Note, users like pmem that depend on the stricter semantics of
- * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
+ * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for
  * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
  * destination is flushed from the cache on return.
  */
@@ -211,24 +211,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 #define _copy_mc_to_iter _copy_to_iter
 #endif
 
-static __always_inline __must_check
-size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
-{
-	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return 0;
-	else
-		return _copy_from_iter_flushcache(addr, bytes, i);
-}
-
-static __always_inline __must_check
-size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
-	if (unlikely(!check_copy_size(addr, bytes, true)))
-		return 0;
-	else
-		return _copy_mc_to_iter(addr, bytes, i);
-}
-
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-- 
cgit v1.2.3


From fd1d00ec92002d8fe28ca981a72395eaa7ae3d11 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Dec 2021 09:45:06 +0100
Subject: dax: simplify dax_synchronous and set_dax_synchronous

Remove the pointless wrappers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta@ionos.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/20211215084508.435401-3-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 87ae4c9b1d65..3bd1fdb5d5f4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -48,16 +48,8 @@ void put_dax(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
 bool dax_write_cache_enabled(struct dax_device *dax_dev);
-bool __dax_synchronous(struct dax_device *dax_dev);
-static inline bool dax_synchronous(struct dax_device *dax_dev)
-{
-	return  __dax_synchronous(dax_dev);
-}
-void __set_dax_synchronous(struct dax_device *dax_dev);
-static inline void set_dax_synchronous(struct dax_device *dax_dev)
-{
-	__set_dax_synchronous(dax_dev);
-}
+bool dax_synchronous(struct dax_device *dax_dev);
+void set_dax_synchronous(struct dax_device *dax_dev);
 /*
  * Check if given mapping is supported by the file / underlying device.
  */
-- 
cgit v1.2.3


From 30c6828a17a572aeb9e3a3bacce05fdcf1106541 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Dec 2021 09:45:07 +0100
Subject: dax: remove the DAXDEV_F_SYNC flag

Remove the DAXDEV_F_SYNC flag and thus the flags argument to alloc_dax and
just let the drivers call set_dax_synchronous directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta@ionos.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/20211215084508.435401-4-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 3bd1fdb5d5f4..c04f46478e3b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -6,9 +6,6 @@
 #include <linux/mm.h>
 #include <linux/radix-tree.h>
 
-/* Flag for synchronous flush */
-#define DAXDEV_F_SYNC (1UL << 0)
-
 typedef unsigned long dax_entry_t;
 
 struct dax_device;
@@ -42,8 +39,7 @@ struct dax_operations {
 };
 
 #if IS_ENABLED(CONFIG_DAX)
-struct dax_device *alloc_dax(void *private, const struct dax_operations *ops,
-		unsigned long flags);
+struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
 void put_dax(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
@@ -64,7 +60,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 }
 #else
 static inline struct dax_device *alloc_dax(void *private,
-		const struct dax_operations *ops, unsigned long flags)
+		const struct dax_operations *ops)
 {
 	/*
 	 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
-- 
cgit v1.2.3


From 7ac5360cd4d02cc7e0eaf10867f599e041822f12 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Dec 2021 09:45:08 +0100
Subject: dax: remove the copy_from_iter and copy_to_iter methods

These methods indirect the actual DAX read/write path.  In the end pmem
uses magic flush and mc safe variants and fuse and dcssblk use plain ones
while device mapper picks redirects to the underlying device.

Add set_dax_nocache() and set_dax_nomc() APIs to control which copy
routines are used to remove indirect call from the read/write fast path
as well as a lot of boilerplate code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com> [virtiofs]
Link: https://lore.kernel.org/r/20211215084508.435401-5-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h           | 9 +++------
 include/linux/device-mapper.h | 4 ----
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index c04f46478e3b..9fc5f99a0ae2 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -28,12 +28,6 @@ struct dax_operations {
 	 */
 	bool (*dax_supported)(struct dax_device *, struct block_device *, int,
 			sector_t, sector_t);
-	/* copy_from_iter: required operation for fs-dax direct-i/o */
-	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
-			struct iov_iter *);
-	/* copy_to_iter: required operation for fs-dax direct-i/o */
-	size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
-			struct iov_iter *);
 	/* zero_page_range: required operation. Zero page range   */
 	int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
 };
@@ -95,6 +89,9 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 }
 #endif
 
+void set_dax_nocache(struct dax_device *dax_dev);
+void set_dax_nomc(struct dax_device *dax_dev);
+
 struct writeback_control;
 #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
 int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a7df155ea49b..b26fecf6c8e8 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
  */
 typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn);
-typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
-		void *addr, size_t bytes, struct iov_iter *i);
 typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
 		size_t nr_pages);
 
@@ -200,8 +198,6 @@ struct target_type {
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
 	dm_dax_direct_access_fn direct_access;
-	dm_dax_copy_iter_fn dax_copy_from_iter;
-	dm_dax_copy_iter_fn dax_copy_to_iter;
 	dm_dax_zero_page_range_fn dax_zero_page_range;
 
 	/* For internal device-mapper use. */
-- 
cgit v1.2.3


From d639b9d13a39cf15639cbe6e8b2c43eb60148a73 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:44 -0800
Subject: bpf: Introduce composable reg, ret and arg types.

There are some common properties shared between bpf reg, ret and arg
values. For instance, a value may be a NULL pointer, or a pointer to
a read-only memory. Previously, to express these properties, enumeration
was used. For example, in order to test whether a reg value can be NULL,
reg_type_may_be_null() simply enumerates all types that are possibly
NULL. The problem of this approach is that it's not scalable and causes
a lot of duplication. These properties can be combined, for example, a
type could be either MAYBE_NULL or RDONLY, or both.

This patch series rewrites the layout of reg_type, arg_type and
ret_type, so that common properties can be extracted and represented as
composable flag. For example, one can write

 ARG_PTR_TO_MEM | PTR_MAYBE_NULL

which is equivalent to the previous

 ARG_PTR_TO_MEM_OR_NULL

The type ARG_PTR_TO_MEM are called "base type" in this patch. Base
types can be extended with flags. A flag occupies the higher bits while
base types sits in the lower bits.

This patch in particular sets up a set of macro for this purpose. The
following patches will rewrite arg_types, ret_types and reg_types
respectively.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217003152.48334-2-haoluo@google.com
---
 include/linux/bpf.h          | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/bpf_verifier.h | 13 +++++++++++++
 2 files changed, 55 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 965fffaf0308..41bb3687cc85 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -297,6 +297,29 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
 
 extern const struct bpf_map_ops bpf_map_offload_ops;
 
+/* bpf_type_flag contains a set of flags that are applicable to the values of
+ * arg_type, ret_type and reg_type. For example, a pointer value may be null,
+ * or a memory is read-only. We classify types into two categories: base types
+ * and extended types. Extended types are base types combined with a type flag.
+ *
+ * Currently there are no more than 32 base types in arg_type, ret_type and
+ * reg_types.
+ */
+#define BPF_BASE_TYPE_BITS	8
+
+enum bpf_type_flag {
+	/* PTR may be NULL. */
+	PTR_MAYBE_NULL		= BIT(0 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= PTR_MAYBE_NULL,
+};
+
+/* Max number of base types. */
+#define BPF_BASE_TYPE_LIMIT	(1UL << BPF_BASE_TYPE_BITS)
+
+/* Max number of all types. */
+#define BPF_TYPE_LIMIT		(__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1))
+
 /* function argument constraints */
 enum bpf_arg_type {
 	ARG_DONTCARE = 0,	/* unused argument in helper function */
@@ -343,7 +366,13 @@ enum bpf_arg_type {
 	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
 	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
 	__BPF_ARG_TYPE_MAX,
+
+	/* This must be the last entry. Its purpose is to ensure the enum is
+	 * wide enough to hold the higher bits reserved for bpf_type_flag.
+	 */
+	__BPF_ARG_TYPE_LIMIT	= BPF_TYPE_LIMIT,
 };
+static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
 
 /* type of values returned from helper functions */
 enum bpf_return_type {
@@ -359,7 +388,14 @@ enum bpf_return_type {
 	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
+	__BPF_RET_TYPE_MAX,
+
+	/* This must be the last entry. Its purpose is to ensure the enum is
+	 * wide enough to hold the higher bits reserved for bpf_type_flag.
+	 */
+	__BPF_RET_TYPE_LIMIT	= BPF_TYPE_LIMIT,
 };
+static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
  * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
@@ -461,7 +497,13 @@ enum bpf_reg_type {
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
 	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
 	__BPF_REG_TYPE_MAX,
+
+	/* This must be the last entry. Its purpose is to ensure the enum is
+	 * wide enough to hold the higher bits reserved for bpf_type_flag.
+	 */
+	__BPF_REG_TYPE_LIMIT	= BPF_TYPE_LIMIT,
 };
+static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
 
 /* The information passed from prog-specific *_is_valid_access
  * back to the verifier.
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ee931398f311..34e4ceaca3c7 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -546,5 +546,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 			    struct bpf_attach_target_info *tgt_info);
 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
 
+#define BPF_BASE_TYPE_MASK	GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
+
+/* extract base type from bpf_{arg, return, reg}_type. */
+static inline u32 base_type(u32 type)
+{
+	return type & BPF_BASE_TYPE_MASK;
+}
+
+/* extract flags from an extended type. See bpf_type_flag in bpf.h. */
+static inline u32 type_flag(u32 type)
+{
+	return type & ~BPF_BASE_TYPE_MASK;
+}
 
 #endif /* _LINUX_BPF_VERIFIER_H */
-- 
cgit v1.2.3


From 48946bd6a5d695c50b34546864b79c1f910a33c1 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:45 -0800
Subject: bpf: Replace ARG_XXX_OR_NULL with ARG_XXX | PTR_MAYBE_NULL

We have introduced a new type to make bpf_arg composable, by
reserving high bits of bpf_arg to represent flags of a type.

One of the flags is PTR_MAYBE_NULL which indicates a pointer
may be NULL. When applying this flag to an arg_type, it means
the arg can take NULL pointer. This patch switches the
qualified arg_types to use this flag. The arg_types changed
in this patch include:

1. ARG_PTR_TO_MAP_VALUE_OR_NULL
2. ARG_PTR_TO_MEM_OR_NULL
3. ARG_PTR_TO_CTX_OR_NULL
4. ARG_PTR_TO_SOCKET_OR_NULL
5. ARG_PTR_TO_ALLOC_MEM_OR_NULL
6. ARG_PTR_TO_STACK_OR_NULL

This patch does not eliminate the use of these arg_types, instead
it makes them an alias to the 'ARG_XXX | PTR_MAYBE_NULL'.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217003152.48334-3-haoluo@google.com
---
 include/linux/bpf.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 41bb3687cc85..765bd7cc4272 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -331,13 +331,11 @@ enum bpf_arg_type {
 	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
 	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
 	ARG_PTR_TO_UNINIT_MAP_VALUE,	/* pointer to valid memory used to store a map value */
-	ARG_PTR_TO_MAP_VALUE_OR_NULL,	/* pointer to stack used as map value or NULL */
 
 	/* the following constraints used to prototype bpf_memcmp() and other
 	 * functions that access data on eBPF program stack
 	 */
 	ARG_PTR_TO_MEM,		/* pointer to valid memory (stack, packet, map value) */
-	ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
 	ARG_PTR_TO_UNINIT_MEM,	/* pointer to memory does not need to be initialized,
 				 * helper function must fill all bytes or clear
 				 * them in error case.
@@ -347,26 +345,31 @@ enum bpf_arg_type {
 	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
-	ARG_PTR_TO_CTX_OR_NULL,	/* pointer to context or NULL */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 	ARG_PTR_TO_INT,		/* pointer to int */
 	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
-	ARG_PTR_TO_SOCKET_OR_NULL,	/* pointer to bpf_sock (fullsock) or NULL */
 	ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
 	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
-	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
 	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
 	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
-	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
+	ARG_PTR_TO_STACK,	/* pointer to stack */
 	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
 	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
 	__BPF_ARG_TYPE_MAX,
 
+	/* Extended arg_types. */
+	ARG_PTR_TO_MAP_VALUE_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE,
+	ARG_PTR_TO_MEM_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
+	ARG_PTR_TO_CTX_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
+	ARG_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
+	ARG_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
+	ARG_PTR_TO_STACK_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
+
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
 	 */
-- 
cgit v1.2.3


From 3c4807322660d4290ac9062c034aed6b87243861 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:46 -0800
Subject: bpf: Replace RET_XXX_OR_NULL with RET_XXX | PTR_MAYBE_NULL

We have introduced a new type to make bpf_ret composable, by
reserving high bits to represent flags.

One of the flag is PTR_MAYBE_NULL, which indicates a pointer
may be NULL. When applying this flag to ret_types, it means
the returned value could be a NULL pointer. This patch
switches the qualified arg_types to use this flag.
The ret_types changed in this patch include:

1. RET_PTR_TO_MAP_VALUE_OR_NULL
2. RET_PTR_TO_SOCKET_OR_NULL
3. RET_PTR_TO_TCP_SOCK_OR_NULL
4. RET_PTR_TO_SOCK_COMMON_OR_NULL
5. RET_PTR_TO_ALLOC_MEM_OR_NULL
6. RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL
7. RET_PTR_TO_BTF_ID_OR_NULL

This patch doesn't eliminate the use of these names, instead
it makes them aliases to 'RET_PTR_TO_XXX | PTR_MAYBE_NULL'.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217003152.48334-4-haoluo@google.com
---
 include/linux/bpf.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 765bd7cc4272..975a1d5951bd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -382,17 +382,22 @@ enum bpf_return_type {
 	RET_INTEGER,			/* function returns integer */
 	RET_VOID,			/* function doesn't return anything */
 	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
-	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
-	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
-	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
-	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
-	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
-	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
-	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
+	RET_PTR_TO_SOCKET,		/* returns a pointer to a socket */
+	RET_PTR_TO_TCP_SOCK,		/* returns a pointer to a tcp_sock */
+	RET_PTR_TO_SOCK_COMMON,		/* returns a pointer to a sock_common */
+	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
 	__BPF_RET_TYPE_MAX,
 
+	/* Extended ret_types. */
+	RET_PTR_TO_MAP_VALUE_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE,
+	RET_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
+	RET_PTR_TO_TCP_SOCK_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
+	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
+	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
 	 */
-- 
cgit v1.2.3


From c25b2ae136039ffa820c26138ed4a5e5f3ab3841 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:47 -0800
Subject: bpf: Replace PTR_TO_XXX_OR_NULL with PTR_TO_XXX | PTR_MAYBE_NULL

We have introduced a new type to make bpf_reg composable, by
allocating bits in the type to represent flags.

One of the flags is PTR_MAYBE_NULL which indicates a pointer
may be NULL. This patch switches the qualified reg_types to
use this flag. The reg_types changed in this patch include:

1. PTR_TO_MAP_VALUE_OR_NULL
2. PTR_TO_SOCKET_OR_NULL
3. PTR_TO_SOCK_COMMON_OR_NULL
4. PTR_TO_TCP_SOCK_OR_NULL
5. PTR_TO_BTF_ID_OR_NULL
6. PTR_TO_MEM_OR_NULL
7. PTR_TO_RDONLY_BUF_OR_NULL
8. PTR_TO_RDWR_BUF_OR_NULL

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20211217003152.48334-5-haoluo@google.com
---
 include/linux/bpf.h          | 18 +++++++++---------
 include/linux/bpf_verifier.h |  4 ++++
 2 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 975a1d5951bd..c3de62267b84 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -465,18 +465,15 @@ enum bpf_reg_type {
 	PTR_TO_CTX,		 /* reg points to bpf_context */
 	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
 	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
-	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
 	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
 	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
 	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
 	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
-	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
 	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
-	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
-	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
 	/* PTR_TO_BTF_ID points to a kernel struct that does not need
@@ -494,18 +491,21 @@ enum bpf_reg_type {
 	 * been checked for null. Used primarily to inform the verifier
 	 * an explicit null check is required for this struct.
 	 */
-	PTR_TO_BTF_ID_OR_NULL,
 	PTR_TO_MEM,		 /* reg points to valid memory region */
-	PTR_TO_MEM_OR_NULL,	 /* reg points to valid memory region or NULL */
 	PTR_TO_RDONLY_BUF,	 /* reg points to a readonly buffer */
-	PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
 	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
-	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
 	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
-	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
 	__BPF_REG_TYPE_MAX,
 
+	/* Extended reg_types. */
+	PTR_TO_MAP_VALUE_OR_NULL	= PTR_MAYBE_NULL | PTR_TO_MAP_VALUE,
+	PTR_TO_SOCKET_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_SOCKET,
+	PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
+	PTR_TO_TCP_SOCK_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
+	PTR_TO_BTF_ID_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_BTF_ID,
+	PTR_TO_MEM_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_MEM,
+
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
 	 */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 34e4ceaca3c7..143401d4c9d9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -18,6 +18,8 @@
  * that converting umax_value to int cannot overflow.
  */
 #define BPF_MAX_VAR_SIZ	(1 << 29)
+/* size of type_str_buf in bpf_verifier. */
+#define TYPE_STR_BUF_LEN 64
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
@@ -484,6 +486,8 @@ struct bpf_verifier_env {
 	/* Same as scratched_regs but for stack slots */
 	u64 scratched_stack_slots;
 	u32 prev_log_len, prev_insn_print_len;
+	/* buffer used in reg_type_str() to generate reg_type string */
+	char type_str_buf[TYPE_STR_BUF_LEN];
 };
 
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
-- 
cgit v1.2.3


From 20b2aff4bc15bda809f994761d5719827d66c0b4 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:48 -0800
Subject: bpf: Introduce MEM_RDONLY flag

This patch introduce a flag MEM_RDONLY to tag a reg value
pointing to read-only memory. It makes the following changes:

1. PTR_TO_RDWR_BUF -> PTR_TO_BUF
2. PTR_TO_RDONLY_BUF -> PTR_TO_BUF | MEM_RDONLY

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217003152.48334-6-haoluo@google.com
---
 include/linux/bpf.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c3de62267b84..126048110bdb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -311,7 +311,10 @@ enum bpf_type_flag {
 	/* PTR may be NULL. */
 	PTR_MAYBE_NULL		= BIT(0 + BPF_BASE_TYPE_BITS),
 
-	__BPF_TYPE_LAST_FLAG	= PTR_MAYBE_NULL,
+	/* MEM is read-only. */
+	MEM_RDONLY		= BIT(1 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= MEM_RDONLY,
 };
 
 /* Max number of base types. */
@@ -492,8 +495,7 @@ enum bpf_reg_type {
 	 * an explicit null check is required for this struct.
 	 */
 	PTR_TO_MEM,		 /* reg points to valid memory region */
-	PTR_TO_RDONLY_BUF,	 /* reg points to a readonly buffer */
-	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
+	PTR_TO_BUF,		 /* reg points to a read/write buffer */
 	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
 	__BPF_REG_TYPE_MAX,
-- 
cgit v1.2.3


From cf9f2f8d62eca810afbd1ee6cc0800202b000e57 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:49 -0800
Subject: bpf: Convert PTR_TO_MEM_OR_NULL to composable types.

Remove PTR_TO_MEM_OR_NULL and replace it with PTR_TO_MEM combined with
flag PTR_MAYBE_NULL.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217003152.48334-7-haoluo@google.com
---
 include/linux/bpf.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 126048110bdb..567d83bf28f9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -506,7 +506,6 @@ enum bpf_reg_type {
 	PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
 	PTR_TO_TCP_SOCK_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
 	PTR_TO_BTF_ID_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_BTF_ID,
-	PTR_TO_MEM_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_MEM,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
-- 
cgit v1.2.3


From 216e3cd2f28dbbf1fe86848e0e29e6693b9f0a20 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 16 Dec 2021 16:31:51 -0800
Subject: bpf: Add MEM_RDONLY for helper args that are pointers to rdonly mem.

Some helper functions may modify its arguments, for example,
bpf_d_path, bpf_get_stack etc. Previously, their argument types
were marked as ARG_PTR_TO_MEM, which is compatible with read-only
mem types, such as PTR_TO_RDONLY_BUF. Therefore it's legitimate,
but technically incorrect, to modify a read-only memory by passing
it into one of such helper functions.

This patch tags the bpf_args compatible with immutable memory with
MEM_RDONLY flag. The arguments that don't have this flag will be
only compatible with mutable memory types, preventing the helper
from modifying a read-only memory. The bpf_args that have
MEM_RDONLY are compatible with both mutable memory and immutable
memory.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217003152.48334-9-haoluo@google.com
---
 include/linux/bpf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 567d83bf28f9..26753139d5b4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -311,7 +311,9 @@ enum bpf_type_flag {
 	/* PTR may be NULL. */
 	PTR_MAYBE_NULL		= BIT(0 + BPF_BASE_TYPE_BITS),
 
-	/* MEM is read-only. */
+	/* MEM is read-only. When applied on bpf_arg, it indicates the arg is
+	 * compatible with both mutable and immutable memory.
+	 */
 	MEM_RDONLY		= BIT(1 + BPF_BASE_TYPE_BITS),
 
 	__BPF_TYPE_LAST_FLAG	= MEM_RDONLY,
-- 
cgit v1.2.3


From 5a9959008fb63191538ab0f7800f4cf26afe7750 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:19 +0100
Subject: flow_offload: add index to flow_action_entry structure

Add index to flow_action_entry structure and delete index from police and
gate child structure.

We make this change to offload tc action for driver to identify a tc
action.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h   | 3 +--
 include/net/tc_act/tc_gate.h | 5 -----
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 3961461d9c8b..2271da5aa8ee 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -197,6 +197,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
 
 struct flow_action_entry {
 	enum flow_action_id		id;
+	u32				hw_index;
 	enum flow_action_hw_stats	hw_stats;
 	action_destr			destructor;
 	void				*destructor_priv;
@@ -232,7 +233,6 @@ struct flow_action_entry {
 			bool			truncate;
 		} sample;
 		struct {				/* FLOW_ACTION_POLICE */
-			u32			index;
 			u32			burst;
 			u64			rate_bytes_ps;
 			u64			burst_pkt;
@@ -267,7 +267,6 @@ struct flow_action_entry {
 			u8		ttl;
 		} mpls_mangle;
 		struct {
-			u32		index;
 			s32		prio;
 			u64		basetime;
 			u64		cycletime;
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index 8bc6be81a7ad..c8fa11ebb397 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -60,11 +60,6 @@ static inline bool is_tcf_gate(const struct tc_action *a)
 	return false;
 }
 
-static inline u32 tcf_gate_index(const struct tc_action *a)
-{
-	return a->tcfa_index;
-}
-
 static inline s32 tcf_gate_prio(const struct tc_action *a)
 {
 	s32 tcfg_prio;
-- 
cgit v1.2.3


From 9c1c0e124ca25589e6cf040e105ab0857f9e9c3e Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:20 +0100
Subject: flow_offload: rename offload functions with offload instead of flow

To improves readability, we rename offload functions with offload instead
of flow.

The term flow is related to exact matches, so we rename these functions
with offload.

We make this change to facilitate single action offload functions naming.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index cebc1bd713b6..5d4ff76d37e2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -536,9 +536,9 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 	return ifindex == skb->skb_iif;
 }
 
-int tc_setup_flow_action(struct flow_action *flow_action,
-			 const struct tcf_exts *exts);
-void tc_cleanup_flow_action(struct flow_action *flow_action);
+int tc_setup_offload_action(struct flow_action *flow_action,
+			    const struct tcf_exts *exts);
+void tc_cleanup_offload_action(struct flow_action *flow_action);
 
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 		     void *type_data, bool err_stop, bool rtnl_held);
-- 
cgit v1.2.3


From c54e1d920f04d528ab558f09326a78d2ae59e323 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:21 +0100
Subject: flow_offload: add ops to tc_action_ops for flow action setup

Add a new ops to tc_action_ops for flow action setup.

Refactor function tc_setup_flow_action to use this new ops.

We make this change to facilitate to add standalone action module.

We will also use this ops to offload action independent of filter
in following patch.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index b5b624c7e488..b418bb0e44e0 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -88,6 +88,16 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
 	dtm->expires = jiffies_to_clock_t(stm->expires);
 }
 
+static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
+{
+	if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
+		return FLOW_ACTION_HW_STATS_DONT_CARE;
+	else if (!hw_stats)
+		return FLOW_ACTION_HW_STATS_DISABLED;
+
+	return hw_stats;
+}
+
 #ifdef CONFIG_NET_CLS_ACT
 
 #define ACT_P_CREATED 1
@@ -121,6 +131,8 @@ struct tc_action_ops {
 	struct psample_group *
 	(*get_psample_group)(const struct tc_action *a,
 			     tc_action_priv_destructor *destructor);
+	int     (*offload_act_setup)(struct tc_action *act, void *entry_data,
+				     u32 *index_inc, bool bind);
 };
 
 struct tc_action_net {
-- 
cgit v1.2.3


From 8cbfe939abe905280279e84a297b1cb34e0d0ec9 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:22 +0100
Subject: flow_offload: allow user to offload tc action to net device

Use flow_indr_dev_register/flow_indr_dev_setup_offload to
offload tc action.

We need to call tc_cleanup_flow_action to clean up tc action entry since
in tc_setup_action, some actions may hold dev refcnt, especially the mirror
action.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  1 +
 include/net/flow_offload.h | 17 +++++++++++++++++
 include/net/pkt_cls.h      |  5 +++++
 3 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a419718612c6..8b0bdeb4734e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -920,6 +920,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_TBF,
 	TC_SETUP_QDISC_FIFO,
 	TC_SETUP_QDISC_HTB,
+	TC_SETUP_ACT,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 2271da5aa8ee..5b8c54eb7a6b 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -551,6 +551,23 @@ struct flow_cls_offload {
 	u32 classid;
 };
 
+enum offload_act_command  {
+	FLOW_ACT_REPLACE,
+	FLOW_ACT_DESTROY,
+	FLOW_ACT_STATS,
+};
+
+struct flow_offload_action {
+	struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/
+	enum offload_act_command  command;
+	enum flow_action_id id;
+	u32 index;
+	struct flow_stats stats;
+	struct flow_action action;
+};
+
+struct flow_offload_action *offload_action_alloc(unsigned int num_actions);
+
 static inline struct flow_rule *
 flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
 {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 5d4ff76d37e2..1bfb616ea759 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -262,6 +262,9 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
 	for (; 0; (void)(i), (void)(a), (void)(exts))
 #endif
 
+#define tcf_act_for_each_action(i, a, actions) \
+	for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
+
 static inline void
 tcf_exts_stats_update(const struct tcf_exts *exts,
 		      u64 bytes, u64 packets, u64 drops, u64 lastuse,
@@ -539,6 +542,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 int tc_setup_offload_action(struct flow_action *flow_action,
 			    const struct tcf_exts *exts);
 void tc_cleanup_offload_action(struct flow_action *flow_action);
+int tc_setup_action(struct flow_action *flow_action,
+		    struct tc_action *actions[]);
 
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 		     void *type_data, bool err_stop, bool rtnl_held);
-- 
cgit v1.2.3


From 7adc576512110ef32b0424a727ee1d04359fc205 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:23 +0100
Subject: flow_offload: add skip_hw and skip_sw to control if offload the
 action

We add skip_hw and skip_sw for user to control if offload the action
to hardware.

We also add in_hw_count for user to indicate if the action is offloaded
to any hardware.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h        | 1 +
 include/uapi/linux/pkt_cls.h | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index b418bb0e44e0..15c6a881817d 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -44,6 +44,7 @@ struct tc_action {
 	u8			hw_stats;
 	u8			used_hw_stats;
 	bool			used_hw_stats_valid;
+	u32			in_hw_count;
 };
 #define tcf_index	common.tcfa_index
 #define tcf_refcnt	common.tcfa_refcnt
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 6836ccb9c45d..ee38b35c3f57 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -19,13 +19,16 @@ enum {
 	TCA_ACT_FLAGS,
 	TCA_ACT_HW_STATS,
 	TCA_ACT_USED_HW_STATS,
+	TCA_ACT_IN_HW_COUNT,
 	__TCA_ACT_MAX
 };
 
 /* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */
-#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
-					 * actions stats.
-					 */
+#define TCA_ACT_FLAGS_NO_PERCPU_STATS (1 << 0) /* Don't use percpu allocator for
+						* actions stats.
+						*/
+#define TCA_ACT_FLAGS_SKIP_HW	(1 << 1) /* don't offload action to HW */
+#define TCA_ACT_FLAGS_SKIP_SW	(1 << 2) /* don't use action in SW */
 
 /* tca HW stats type
  * When user does not pass the attribute, he does not care.
-- 
cgit v1.2.3


From bcd64368584bab38bdd095c88df702fb64271694 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:24 +0100
Subject: flow_offload: rename exts stats update functions with hw

Rename exts stats update functions with hw for readability.

We make this change also to update stats from hw for an action
when it is offloaded to hw as a single action.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 1bfb616ea759..efdfab8eb00c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -266,9 +266,9 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
 	for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
 
 static inline void
-tcf_exts_stats_update(const struct tcf_exts *exts,
-		      u64 bytes, u64 packets, u64 drops, u64 lastuse,
-		      u8 used_hw_stats, bool used_hw_stats_valid)
+tcf_exts_hw_stats_update(const struct tcf_exts *exts,
+			 u64 bytes, u64 packets, u64 drops, u64 lastuse,
+			 u8 used_hw_stats, bool used_hw_stats_valid)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	int i;
-- 
cgit v1.2.3


From c7a66f8d8a946edafb38150480145ab9801e4e52 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:25 +0100
Subject: flow_offload: add process to update action stats from hardware

When collecting stats for actions update them using both
hardware and software counters.

Stats update process should not run in context of preempt_disable.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  1 +
 include/net/pkt_cls.h | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 15c6a881817d..20104dfdd57c 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -253,6 +253,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
 			     u64 drops, bool hw);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
+int tcf_action_update_hw_stats(struct tc_action *action);
 int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
 			     struct tcf_chain **handle,
 			     struct netlink_ext_ack *newchain);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index efdfab8eb00c..337a3ebb4666 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -273,18 +273,20 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts,
 #ifdef CONFIG_NET_CLS_ACT
 	int i;
 
-	preempt_disable();
-
 	for (i = 0; i < exts->nr_actions; i++) {
 		struct tc_action *a = exts->actions[i];
 
-		tcf_action_stats_update(a, bytes, packets, drops,
-					lastuse, true);
-		a->used_hw_stats = used_hw_stats;
-		a->used_hw_stats_valid = used_hw_stats_valid;
-	}
+		/* if stats from hw, just skip */
+		if (tcf_action_update_hw_stats(a)) {
+			preempt_disable();
+			tcf_action_stats_update(a, bytes, packets, drops,
+						lastuse, true);
+			preempt_enable();
 
-	preempt_enable();
+			a->used_hw_stats = used_hw_stats;
+			a->used_hw_stats_valid = used_hw_stats_valid;
+		}
+	}
 #endif
 }
 
-- 
cgit v1.2.3


From 13926d19a11e303f12571df61b7bb64f17cb4561 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:27 +0100
Subject: flow_offload: add reoffload process to update hw_count

Add reoffload process to update hw_count when driver
is inserted or removed.

We will delete the action if it is with skip_sw flag and
not offloaded to any hardware in reoffload process.

When reoffloading actions, we still offload the actions
that are added independent of filters.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 20104dfdd57c..0f5f69deb3ce 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -7,6 +7,7 @@
 */
 
 #include <linux/refcount.h>
+#include <net/flow_offload.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
@@ -254,6 +255,8 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
 int tcf_action_update_hw_stats(struct tc_action *action);
+int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
+			    void *cb_priv, bool add);
 int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
 			     struct tcf_chain **handle,
 			     struct netlink_ext_ack *newchain);
@@ -265,6 +268,14 @@ DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
 #endif
 
 int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
+
+#else /* !CONFIG_NET_CLS_ACT */
+
+static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
+					  void *cb_priv, bool add) {
+	return 0;
+}
+
 #endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
-- 
cgit v1.2.3


From c86e0209dc7725c91583e3c0c78c3da6a28daeb4 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 17 Dec 2021 19:16:28 +0100
Subject: flow_offload: validate flags of filter and actions

Add process to validate flags of filter and actions when adding
a tc filter.

We need to prevent adding filter with flags conflicts with its actions.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 2 +-
 include/net/pkt_cls.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 0f5f69deb3ce..3049cb69c025 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -203,7 +203,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est,
 		    struct tc_action *actions[], int init_res[], size_t *attr_size,
-		    u32 flags, struct netlink_ext_ack *extack);
+		    u32 flags, u32 fl_flags, struct netlink_ext_ack *extack);
 struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
 					 bool rtnl_held,
 					 struct netlink_ext_ack *extack);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 337a3ebb4666..ebef45e821af 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -330,6 +330,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, u32 flags,
 		      struct netlink_ext_ack *extack);
+int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
+			 struct nlattr *rate_tlv, struct tcf_exts *exts,
+			 u32 flags, u32 fl_flags, struct netlink_ext_ack *extack);
 void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
-- 
cgit v1.2.3


From a949f2cf1ab9b3afd894427a64fce24fd8bae0a6 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Fri, 17 Dec 2021 18:15:43 +0200
Subject: dt-bindings: clock: Add bindings for Exynos850 sysreg clocks

System Register is used to configure system behavior, like USI protocol,
etc. SYSREG clocks should be provided to corresponding syscon nodes, to
make it possible to modify SYSREG registers.

While at it, add also missing PMU and GPIO clocks, which looks necessary
and might be needed for corresponding Exynos850 features soon.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/20211217161549.24836-2-semen.protsenko@linaro.org
---
 include/dt-bindings/clock/exynos850.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h
index 8aa5e82af0d3..0b6a3c6a7c90 100644
--- a/include/dt-bindings/clock/exynos850.h
+++ b/include/dt-bindings/clock/exynos850.h
@@ -82,7 +82,10 @@
 #define CLK_GOUT_I3C_PCLK		19
 #define CLK_GOUT_I3C_SCLK		20
 #define CLK_GOUT_SPEEDY_PCLK		21
-#define APM_NR_CLK			22
+#define CLK_GOUT_GPIO_ALIVE_PCLK	22
+#define CLK_GOUT_PMU_ALIVE_PCLK		23
+#define CLK_GOUT_SYSREG_APM_PCLK	24
+#define APM_NR_CLK			25
 
 /* CMU_CMGP */
 #define CLK_RCO_CMGP			1
@@ -99,7 +102,8 @@
 #define CLK_GOUT_CMGP_USI0_PCLK		12
 #define CLK_GOUT_CMGP_USI1_IPCLK	13
 #define CLK_GOUT_CMGP_USI1_PCLK		14
-#define CMGP_NR_CLK			15
+#define CLK_GOUT_SYSREG_CMGP_PCLK	15
+#define CMGP_NR_CLK			16
 
 /* CMU_HSI */
 #define CLK_MOUT_HSI_BUS_USER		1
@@ -167,7 +171,9 @@
 #define CLK_GOUT_MMC_EMBD_SDCLKIN	10
 #define CLK_GOUT_SSS_ACLK		11
 #define CLK_GOUT_SSS_PCLK		12
-#define CORE_NR_CLK			13
+#define CLK_GOUT_GPIO_CORE_PCLK		13
+#define CLK_GOUT_SYSREG_CORE_PCLK	14
+#define CORE_NR_CLK			15
 
 /* CMU_DPU */
 #define CLK_MOUT_DPU_USER		1
-- 
cgit v1.2.3


From 591020a516720e9eba1c4b1748cb73b6748e445f Mon Sep 17 00:00:00 2001
From: David Virag <virag.david003@gmail.com>
Date: Mon, 6 Dec 2021 16:31:15 +0100
Subject: dt-bindings: clock: Add bindings definitions for Exynos7885 CMU

Just like on Exynos850, the clock controller driver is designed to have
separate instances for each particular CMU, so clock IDs start from 1
for each CMU in this bindings header too.

Signed-off-by: David Virag <virag.david003@gmail.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211206153124.427102-2-virag.david003@gmail.com
---
 include/dt-bindings/clock/exynos7885.h | 115 +++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 include/dt-bindings/clock/exynos7885.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h
new file mode 100644
index 000000000000..1f8701691d62
--- /dev/null
+++ b/include/dt-bindings/clock/exynos7885.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021 Dávid Virág
+ *
+ * Device Tree binding constants for Exynos7885 clock controller.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_EXYNOS_7885_H
+#define _DT_BINDINGS_CLOCK_EXYNOS_7885_H
+
+/* CMU_TOP */
+#define CLK_FOUT_SHARED0_PLL		1
+#define CLK_FOUT_SHARED1_PLL		2
+#define CLK_DOUT_SHARED0_DIV2		3
+#define CLK_DOUT_SHARED0_DIV3		4
+#define CLK_DOUT_SHARED0_DIV4		5
+#define CLK_DOUT_SHARED0_DIV5		6
+#define CLK_DOUT_SHARED1_DIV2		7
+#define CLK_DOUT_SHARED1_DIV3		8
+#define CLK_DOUT_SHARED1_DIV4		9
+#define CLK_MOUT_CORE_BUS		10
+#define CLK_MOUT_CORE_CCI		11
+#define CLK_MOUT_CORE_G3D		12
+#define CLK_DOUT_CORE_BUS		13
+#define CLK_DOUT_CORE_CCI		14
+#define CLK_DOUT_CORE_G3D		15
+#define CLK_GOUT_CORE_BUS		16
+#define CLK_GOUT_CORE_CCI		17
+#define CLK_GOUT_CORE_G3D		18
+#define CLK_MOUT_PERI_BUS		19
+#define CLK_MOUT_PERI_SPI0		20
+#define CLK_MOUT_PERI_SPI1		21
+#define CLK_MOUT_PERI_UART0		22
+#define CLK_MOUT_PERI_UART1		23
+#define CLK_MOUT_PERI_UART2		24
+#define CLK_MOUT_PERI_USI0		25
+#define CLK_MOUT_PERI_USI1		26
+#define CLK_MOUT_PERI_USI2		27
+#define CLK_DOUT_PERI_BUS		28
+#define CLK_DOUT_PERI_SPI0		29
+#define CLK_DOUT_PERI_SPI1		30
+#define CLK_DOUT_PERI_UART0		31
+#define CLK_DOUT_PERI_UART1		32
+#define CLK_DOUT_PERI_UART2		33
+#define CLK_DOUT_PERI_USI0		34
+#define CLK_DOUT_PERI_USI1		35
+#define CLK_DOUT_PERI_USI2		36
+#define CLK_GOUT_PERI_BUS		37
+#define CLK_GOUT_PERI_SPI0		38
+#define CLK_GOUT_PERI_SPI1		39
+#define CLK_GOUT_PERI_UART0		40
+#define CLK_GOUT_PERI_UART1		41
+#define CLK_GOUT_PERI_UART2		42
+#define CLK_GOUT_PERI_USI0		43
+#define CLK_GOUT_PERI_USI1		44
+#define CLK_GOUT_PERI_USI2		45
+#define TOP_NR_CLK			46
+
+/* CMU_CORE */
+#define CLK_MOUT_CORE_BUS_USER		1
+#define CLK_MOUT_CORE_CCI_USER		2
+#define CLK_MOUT_CORE_G3D_USER		3
+#define CLK_MOUT_CORE_GIC		4
+#define CLK_DOUT_CORE_BUSP		5
+#define CLK_GOUT_CCI_ACLK		6
+#define CLK_GOUT_GIC400_CLK		7
+#define CORE_NR_CLK			8
+
+/* CMU_PERI */
+#define CLK_MOUT_PERI_BUS_USER		1
+#define CLK_MOUT_PERI_SPI0_USER		2
+#define CLK_MOUT_PERI_SPI1_USER		3
+#define CLK_MOUT_PERI_UART0_USER	4
+#define CLK_MOUT_PERI_UART1_USER	5
+#define CLK_MOUT_PERI_UART2_USER	6
+#define CLK_MOUT_PERI_USI0_USER		7
+#define CLK_MOUT_PERI_USI1_USER		8
+#define CLK_MOUT_PERI_USI2_USER		9
+#define CLK_GOUT_GPIO_TOP_PCLK		10
+#define CLK_GOUT_HSI2C0_PCLK		11
+#define CLK_GOUT_HSI2C1_PCLK		12
+#define CLK_GOUT_HSI2C2_PCLK		13
+#define CLK_GOUT_HSI2C3_PCLK		14
+#define CLK_GOUT_I2C0_PCLK		15
+#define CLK_GOUT_I2C1_PCLK		16
+#define CLK_GOUT_I2C2_PCLK		17
+#define CLK_GOUT_I2C3_PCLK		18
+#define CLK_GOUT_I2C4_PCLK		19
+#define CLK_GOUT_I2C5_PCLK		20
+#define CLK_GOUT_I2C6_PCLK		21
+#define CLK_GOUT_I2C7_PCLK		22
+#define CLK_GOUT_PWM_MOTOR_PCLK		23
+#define CLK_GOUT_SPI0_PCLK		24
+#define CLK_GOUT_SPI0_EXT_CLK		25
+#define CLK_GOUT_SPI1_PCLK		26
+#define CLK_GOUT_SPI1_EXT_CLK		27
+#define CLK_GOUT_UART0_EXT_UCLK		28
+#define CLK_GOUT_UART0_PCLK		29
+#define CLK_GOUT_UART1_EXT_UCLK		30
+#define CLK_GOUT_UART1_PCLK		31
+#define CLK_GOUT_UART2_EXT_UCLK		32
+#define CLK_GOUT_UART2_PCLK		33
+#define CLK_GOUT_USI0_PCLK		34
+#define CLK_GOUT_USI0_SCLK		35
+#define CLK_GOUT_USI1_PCLK		36
+#define CLK_GOUT_USI1_SCLK		37
+#define CLK_GOUT_USI2_PCLK		38
+#define CLK_GOUT_USI2_SCLK		39
+#define CLK_GOUT_MCT_PCLK		40
+#define CLK_GOUT_SYSREG_PERI_PCLK	41
+#define CLK_GOUT_WDT0_PCLK		42
+#define CLK_GOUT_WDT1_PCLK		43
+#define PERI_NR_CLK			44
+
+#endif /* _DT_BINDINGS_CLOCK_EXYNOS_7885_H */
-- 
cgit v1.2.3


From 7d4203c13435c0bdae61bf16bbd0408d5b958ade Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 25 Nov 2021 18:15:37 +0100
Subject: mm: add virt_to_folio() and folio_address()

These two wrappers around their respective struct page variants will be
useful in the following patches.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
---
 include/linux/mm.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..4a6cf22483da 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x)
 	return compound_head(page);
 }
 
+static inline struct folio *virt_to_folio(const void *x)
+{
+	struct page *page = virt_to_page(x);
+
+	return page_folio(page);
+}
+
 void __put_page(struct page *page);
 
 void put_pages_list(struct list_head *pages);
@@ -1753,6 +1760,11 @@ void page_address_init(void);
 #define page_address_init()  do { } while(0)
 #endif
 
+static inline void *folio_address(const struct folio *folio)
+{
+	return page_address(&folio->page);
+}
+
 extern void *page_rmapping(struct page *page);
 extern struct anon_vma *page_anon_vma(struct page *page);
 extern pgoff_t __page_file_index(struct page *page);
-- 
cgit v1.2.3


From d5c383f2c98ac58c210b266cdaf7b86bc32d1ad1 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 17 Dec 2021 15:30:56 +0000
Subject: iommu/iova: Squash entry_dtor abstraction

All flush queues are driven by iommu-dma now, so there is no need to
abstract entry_dtor or its data any more. Squash the now-canonical
implementation directly into the IOVA code to get it out of the way.

Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/2260f8de00ab5e0f9d2a1cf8978e6ae7cd4f182c.1639753638.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index 71d8a2de6635..e746d8e41449 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -40,9 +40,6 @@ struct iova_domain;
 /* Call-Back from IOVA code into IOMMU drivers */
 typedef void (* iova_flush_cb)(struct iova_domain *domain);
 
-/* Destructor for per-entry data */
-typedef void (* iova_entry_dtor)(unsigned long data);
-
 /* Number of entries per Flush Queue */
 #define IOVA_FQ_SIZE	256
 
@@ -53,7 +50,7 @@ typedef void (* iova_entry_dtor)(unsigned long data);
 struct iova_fq_entry {
 	unsigned long iova_pfn;
 	unsigned long pages;
-	unsigned long data;
+	struct page *freelist;
 	u64 counter; /* Flush counter when this entrie was added */
 };
 
@@ -88,9 +85,6 @@ struct iova_domain {
 	iova_flush_cb	flush_cb;	/* Call-Back function to flush IOMMU
 					   TLBs */
 
-	iova_entry_dtor entry_dtor;	/* IOMMU driver specific destructor for
-					   iova entry */
-
 	struct timer_list fq_timer;		/* Timer to regularily empty the
 						   flush-queues */
 	atomic_t fq_timer_on;			/* 1 when timer is active, 0
@@ -146,15 +140,14 @@ void free_iova_fast(struct iova_domain *iovad, unsigned long pfn,
 		    unsigned long size);
 void queue_iova(struct iova_domain *iovad,
 		unsigned long pfn, unsigned long pages,
-		unsigned long data);
+		struct page *freelist);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 			      unsigned long limit_pfn, bool flush_rcache);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 	unsigned long pfn_hi);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn);
-int init_iova_flush_queue(struct iova_domain *iovad,
-			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
+int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 #else
@@ -189,12 +182,6 @@ static inline void free_iova_fast(struct iova_domain *iovad,
 {
 }
 
-static inline void queue_iova(struct iova_domain *iovad,
-			      unsigned long pfn, unsigned long pages,
-			      unsigned long data)
-{
-}
-
 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
 					    unsigned long size,
 					    unsigned long limit_pfn,
@@ -216,13 +203,6 @@ static inline void init_iova_domain(struct iova_domain *iovad,
 {
 }
 
-static inline int init_iova_flush_queue(struct iova_domain *iovad,
-					iova_flush_cb flush_cb,
-					iova_entry_dtor entry_dtor)
-{
-	return -ENODEV;
-}
-
 static inline struct iova *find_iova(struct iova_domain *iovad,
 				     unsigned long pfn)
 {
-- 
cgit v1.2.3


From 649ad9835a3783bcb6c69368fa939e0010abb2c6 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 17 Dec 2021 15:30:57 +0000
Subject: iommu/iova: Squash flush_cb abstraction

Once again, with iommu-dma now being the only flush queue user, we no
longer need the extra level of indirection through flush_cb. Squash that
and let the flush queue code call the domain method directly. This does
mean temporarily having to carry an additional copy of the IOMMU domain
pointer around instead, but only until a later patch untangles it again.

Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/e3f9b4acdd6640012ef4fbc819ac868d727b64a9.1639753638.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index e746d8e41449..99be4fcea4f3 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -14,6 +14,7 @@
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
 #include <linux/dma-mapping.h>
+#include <linux/iommu.h>
 
 /* iova structure */
 struct iova {
@@ -35,11 +36,6 @@ struct iova_rcache {
 	struct iova_cpu_rcache __percpu *cpu_rcaches;
 };
 
-struct iova_domain;
-
-/* Call-Back from IOVA code into IOMMU drivers */
-typedef void (* iova_flush_cb)(struct iova_domain *domain);
-
 /* Number of entries per Flush Queue */
 #define IOVA_FQ_SIZE	256
 
@@ -82,8 +78,7 @@ struct iova_domain {
 	struct iova	anchor;		/* rbtree lookup anchor */
 	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
 
-	iova_flush_cb	flush_cb;	/* Call-Back function to flush IOMMU
-					   TLBs */
+	struct iommu_domain *fq_domain;
 
 	struct timer_list fq_timer;		/* Timer to regularily empty the
 						   flush-queues */
@@ -147,7 +142,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 	unsigned long pfn_hi);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn);
-int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb);
+int init_iova_flush_queue(struct iova_domain *iovad, struct iommu_domain *fq_domain);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 #else
-- 
cgit v1.2.3


From 87f60cc65d24939353b40aa1d9297fea080cdf8d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 17 Dec 2021 15:31:00 +0000
Subject: iommu/vt-d: Use put_pages_list

page->freelist is for the use of slab.  We already have the ability
to free a list of pages in the core mm, but it requires the use of a
list_head and for the pages to be chained together through page->lru.
Switch the Intel IOMMU and IOVA code over to using free_pages_list().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
[rm: split from original patch, cosmetic tweaks, fix fq entries]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/2115b560d9a0ce7cd4b948bd51a2b7bde8fdfd59.1639753638.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 3 ++-
 include/linux/iova.h  | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d2f3435e7d17..de0c57a567c8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -186,7 +186,7 @@ struct iommu_iotlb_gather {
 	unsigned long		start;
 	unsigned long		end;
 	size_t			pgsize;
-	struct page		*freelist;
+	struct list_head	freelist;
 	bool			queued;
 };
 
@@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 {
 	*gather = (struct iommu_iotlb_gather) {
 		.start	= ULONG_MAX,
+		.freelist = LIST_HEAD_INIT(gather->freelist),
 	};
 }
 
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 99be4fcea4f3..072a09c06e8a 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -46,7 +46,7 @@ struct iova_rcache {
 struct iova_fq_entry {
 	unsigned long iova_pfn;
 	unsigned long pages;
-	struct page *freelist;
+	struct list_head freelist;
 	u64 counter; /* Flush counter when this entrie was added */
 };
 
@@ -135,7 +135,7 @@ void free_iova_fast(struct iova_domain *iovad, unsigned long pfn,
 		    unsigned long size);
 void queue_iova(struct iova_domain *iovad,
 		unsigned long pfn, unsigned long pages,
-		struct page *freelist);
+		struct list_head *freelist);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 			      unsigned long limit_pfn, bool flush_rcache);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
-- 
cgit v1.2.3


From a17e3026bc4da9135ca9a42ec0b1fa67f95172e3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 17 Dec 2021 15:31:03 +0000
Subject: iommu: Move flush queue data into iommu_dma_cookie

Complete the move into iommu-dma by refactoring the flush queues
themselves to belong to the DMA cookie rather than the IOVA domain.

The refactoring may as well extend to some minor cosmetic aspects
too, to help us stay one step ahead of the style police.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/24304722005bc6f144e2a1fdd865d1465722fc2e.1639753638.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 44 +-------------------------------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index 072a09c06e8a..0abd48c5e622 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -12,9 +12,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/rbtree.h>
-#include <linux/atomic.h>
-#include <linux/dma-mapping.h>
-#include <linux/iommu.h>
 
 /* iova structure */
 struct iova {
@@ -36,27 +33,6 @@ struct iova_rcache {
 	struct iova_cpu_rcache __percpu *cpu_rcaches;
 };
 
-/* Number of entries per Flush Queue */
-#define IOVA_FQ_SIZE	256
-
-/* Timeout (in ms) after which entries are flushed from the Flush-Queue */
-#define IOVA_FQ_TIMEOUT	10
-
-/* Flush Queue entry for defered flushing */
-struct iova_fq_entry {
-	unsigned long iova_pfn;
-	unsigned long pages;
-	struct list_head freelist;
-	u64 counter; /* Flush counter when this entrie was added */
-};
-
-/* Per-CPU Flush Queue structure */
-struct iova_fq {
-	struct iova_fq_entry entries[IOVA_FQ_SIZE];
-	unsigned head, tail;
-	spinlock_t lock;
-};
-
 /* holds all the iova translations for a domain */
 struct iova_domain {
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
@@ -67,23 +43,9 @@ struct iova_domain {
 	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
 	unsigned long	max32_alloc_size; /* Size of last failed allocation */
-	struct iova_fq __percpu *fq;	/* Flush Queue */
-
-	atomic64_t	fq_flush_start_cnt;	/* Number of TLB flushes that
-						   have been started */
-
-	atomic64_t	fq_flush_finish_cnt;	/* Number of TLB flushes that
-						   have been finished */
-
 	struct iova	anchor;		/* rbtree lookup anchor */
-	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
-
-	struct iommu_domain *fq_domain;
 
-	struct timer_list fq_timer;		/* Timer to regularily empty the
-						   flush-queues */
-	atomic_t fq_timer_on;			/* 1 when timer is active, 0
-						   when not */
+	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
 	struct hlist_node	cpuhp_dead;
 };
 
@@ -133,16 +95,12 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
 	bool size_aligned);
 void free_iova_fast(struct iova_domain *iovad, unsigned long pfn,
 		    unsigned long size);
-void queue_iova(struct iova_domain *iovad,
-		unsigned long pfn, unsigned long pages,
-		struct list_head *freelist);
 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 			      unsigned long limit_pfn, bool flush_rcache);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 	unsigned long pfn_hi);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn);
-int init_iova_flush_queue(struct iova_domain *iovad, struct iommu_domain *fq_domain);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
 void put_iova_domain(struct iova_domain *iovad);
 #else
-- 
cgit v1.2.3


From 51b1a5729469cef57a3c97aa014aa6e1d2b8d864 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Fri, 17 Dec 2021 18:15:47 +0200
Subject: dt-bindings: pinctrl: samsung: Add pin drive definitions for
 Exynos850

All Exynos850 GPIO blocks can use EXYNOS5420_PIN_DRV* definitions,
except GPIO_HSI block. Add pin drive strength definitions for GPIO_HSI
block correspondingly.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211217161549.24836-6-semen.protsenko@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 include/dt-bindings/pinctrl/samsung.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h
index b1832506b923..950970634dfe 100644
--- a/include/dt-bindings/pinctrl/samsung.h
+++ b/include/dt-bindings/pinctrl/samsung.h
@@ -36,7 +36,10 @@
 #define EXYNOS5260_PIN_DRV_LV4		2
 #define EXYNOS5260_PIN_DRV_LV6		3
 
-/* Drive strengths for Exynos5410, Exynos542x and Exynos5800 */
+/*
+ * Drive strengths for Exynos5410, Exynos542x, Exynos5800 and Exynos850 (except
+ * GPIO_HSI block)
+ */
 #define EXYNOS5420_PIN_DRV_LV1		0
 #define EXYNOS5420_PIN_DRV_LV2		1
 #define EXYNOS5420_PIN_DRV_LV3		2
@@ -56,6 +59,14 @@
 #define EXYNOS5433_PIN_DRV_SLOW_SR5	0xc
 #define EXYNOS5433_PIN_DRV_SLOW_SR6	0xf
 
+/* Drive strengths for Exynos850 GPIO_HSI block */
+#define EXYNOS850_HSI_PIN_DRV_LV1	0	/* 1x   */
+#define EXYNOS850_HSI_PIN_DRV_LV1_5	1	/* 1.5x */
+#define EXYNOS850_HSI_PIN_DRV_LV2	2	/* 2x   */
+#define EXYNOS850_HSI_PIN_DRV_LV2_5	3	/* 2.5x */
+#define EXYNOS850_HSI_PIN_DRV_LV3	4	/* 3x   */
+#define EXYNOS850_HSI_PIN_DRV_LV4	5	/* 4x   */
+
 #define EXYNOS_PIN_FUNC_INPUT		0
 #define EXYNOS_PIN_FUNC_OUTPUT		1
 #define EXYNOS_PIN_FUNC_2		2
-- 
cgit v1.2.3


From 28f350a67d291575492057c92ceb8518ecbace95 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 29 Nov 2021 15:32:41 +0200
Subject: cfg80211: Fix order of enum nl80211_band_iftype_attr documentation

And fix the comment.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20211129152938.4ef43aff0c5d.I96dcb743bcd4f387ba4cfaa61987aeb642ad762b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 3e734826792f..bf69ecbc1c24 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3747,13 +3747,12 @@ enum nl80211_mpath_info {
  *     capabilities IE
  * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as
  *     defined in HE capabilities IE
- * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
- *     defined
  * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16),
  *	given for all 6 GHz band channels
  * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are
  *	advertised on this band/for this iftype (binary)
  * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
+ * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined
  */
 enum nl80211_band_iftype_attr {
 	__NL80211_BAND_IFTYPE_ATTR_INVALID,
-- 
cgit v1.2.3


From 6d501764288cf7869c7f54f1fcabd77bcd91b90e Mon Sep 17 00:00:00 2001
From: Nathan Errera <nathan.errera@intel.com>
Date: Mon, 29 Nov 2021 15:32:38 +0200
Subject: mac80211: introduce channel switch disconnect function

Introduce a disconnect function that can be used when a
channel switch error occurs. The channel switch can request to
block the tx, and so, we need to make sure we do not send a deauth
frame in this case.

Signed-off-by: Nathan Errera <nathan.errera@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20211129152938.cd2a615a0702.I9edb14785586344af17644b610ab5be109dcef00@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e349f57d19ae..28e66cdae3ec 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6075,6 +6075,18 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw);
  */
 void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
 
+/**
+ * ieee80211_channel_switch_disconnect - disconnect due to channel switch error
+ * @vif &struct ieee80211_vif pointer from the add_interface callback.
+ * @block_tx: if %true, do not send deauth frame.
+ *
+ * Instruct mac80211 to disconnect due to a channel switch error. The channel
+ * switch can request to block the tx and so, we need to make sure we do not send
+ * a deauth frame in this case.
+ */
+void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif,
+					 bool block_tx);
+
 /**
  * ieee80211_request_smps - request SM PS transition
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
-- 
cgit v1.2.3


From a083ee8a4e03348fb90a4b24cbe957b3252c7b04 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 29 Nov 2021 15:32:34 +0200
Subject: cfg80211: Add support for notifying association comeback

Thought the underline driver MLME can handle association temporal
rejection with comeback, it is still useful to notify this to
user space, as user space might want to handle the temporal
rejection differently. For example, in case the comeback time
is too long, user space can deauthenticate immediately and try
to associate with a different AP.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20211129152938.2467809e8cb3.I45574185b582666bc78eef0c29a4c36b478e5382@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 ++++++++++++
 include/uapi/linux/nl80211.h |  7 +++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a887086cb103..c9a9073d4c2a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8287,6 +8287,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
 			     bool is_4addr, u8 check_swif);
 
 
+/**
+ * cfg80211_assoc_comeback - notification of association that was
+ * temporarly rejected with a comeback
+ * @netdev: network device
+ * @bss: the bss entry with which association is in progress.
+ * @timeout: timeout interval value TUs.
+ *
+ * this function may sleep. the caller must hold the corresponding wdev's mutex.
+ */
+void cfg80211_assoc_comeback(struct net_device *netdev,
+			     struct cfg80211_bss *bss, u32 timeout);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index bf69ecbc1c24..a8e20d25252b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1232,6 +1232,11 @@
  *	&NL80211_ATTR_FILS_NONCES - for FILS Nonces
  *		(STA Nonce 16 bytes followed by AP Nonce 16 bytes)
  *
+ * @NL80211_CMD_ASSOC_COMEBACK: notification about an association
+ *      temporal rejection with comeback. The event includes %NL80211_ATTR_MAC
+ *      to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to
+ *      specify the timeout value.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1474,6 +1479,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_FILS_AAD,
 
+	NL80211_CMD_ASSOC_COMEBACK,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
-- 
cgit v1.2.3


From a95bfb876fa87e2d0fa718ee61a8030ddf162d2b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 29 Nov 2021 14:11:24 +0100
Subject: cfg80211: rename offchannel_chain structs to background_chain to
 avoid confusion with ETSI standard

ETSI standard defines "Offchannel CAC" as:
"Off-Channel CAC is performed by a number of non-continuous checks
spread over a period in time. This period, which is required to
determine the presence of radar signals, is defined as the Off-Channel
CAC Time..
Minimum Off-Channel CAC Time 6 minutes and Maximum Off-Channel CAC Time
4 hours..".
mac80211 implementation refers to a dedicated hw chain used for continuous
radar monitoring. Rename offchannel_* references to background_* in
order to avoid confusion with ETSI standard.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/4204cc1d648d76b44557981713231e030a3bd991.1638190762.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 20 ++++++++++----------
 include/net/mac80211.h       | 10 +++++-----
 include/uapi/linux/nl80211.h | 14 +++++++-------
 3 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c9a9073d4c2a..e29d904eccff 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4073,14 +4073,14 @@ struct mgmt_frame_regs {
  *	those to decrypt (Re)Association Request and encrypt (Re)Association
  *	Response frame.
  *
- * @set_radar_offchan: Configure dedicated offchannel chain available for
+ * @set_radar_background: Configure dedicated offchannel chain available for
  *	radar/CAC detection on some hw. This chain can't be used to transmit
  *	or receive frames and it is bounded to a running wdev.
- *	Offchannel radar/CAC detection allows to avoid the CAC downtime
+ *	Background radar/CAC detection allows to avoid the CAC downtime
  *	switching to a different channel during CAC detection on the selected
  *	radar channel.
  *	The caller is expected to set chandef pointer to NULL in order to
- *	disable offchannel CAC/radar detection.
+ *	disable background CAC/radar detection.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4413,8 +4413,8 @@ struct cfg80211_ops {
 				struct cfg80211_color_change_settings *params);
 	int     (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev,
 				struct cfg80211_fils_aad *fils_aad);
-	int	(*set_radar_offchan)(struct wiphy *wiphy,
-				     struct cfg80211_chan_def *chandef);
+	int	(*set_radar_background)(struct wiphy *wiphy,
+					struct cfg80211_chan_def *chandef);
 };
 
 /*
@@ -7626,9 +7626,9 @@ cfg80211_radar_event(struct wiphy *wiphy,
 }
 
 static inline void
-cfg80211_offchan_radar_event(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef,
-			     gfp_t gfp)
+cfg80211_background_radar_event(struct wiphy *wiphy,
+				struct cfg80211_chan_def *chandef,
+				gfp_t gfp)
 {
 	__cfg80211_radar_event(wiphy, chandef, true, gfp);
 }
@@ -7663,13 +7663,13 @@ void cfg80211_cac_event(struct net_device *netdev,
 			enum nl80211_radar_event event, gfp_t gfp);
 
 /**
- * cfg80211_offchan_cac_abort - Channel Availability Check offchan abort event
+ * cfg80211_background_cac_abort - Channel Availability Check offchan abort event
  * @wiphy: the wiphy
  *
  * This function is called by the driver when a Channel Availability Check
  * (CAC) is aborted by a offchannel dedicated chain.
  */
-void cfg80211_offchan_cac_abort(struct wiphy *wiphy);
+void cfg80211_background_cac_abort(struct wiphy *wiphy);
 
 /**
  * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 28e66cdae3ec..8907338d52b5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3939,14 +3939,14 @@ struct ieee80211_prep_tx_info {
  *	twt structure.
  * @twt_teardown_request: Update the hw with TWT teardown request received
  *	from the peer.
- * @set_radar_offchan: Configure dedicated offchannel chain available for
+ * @set_radar_background: Configure dedicated offchannel chain available for
  *	radar/CAC detection on some hw. This chain can't be used to transmit
  *	or receive frames and it is bounded to a running wdev.
- *	Offchannel radar/CAC detection allows to avoid the CAC downtime
+ *	Background radar/CAC detection allows to avoid the CAC downtime
  *	switching to a different channel during CAC detection on the selected
  *	radar channel.
  *	The caller is expected to set chandef pointer to NULL in order to
- *	disable offchannel CAC/radar detection.
+ *	disable background CAC/radar detection.
  * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to
  *	resolve a path for hardware flow offloading
  */
@@ -4277,8 +4277,8 @@ struct ieee80211_ops {
 			      struct ieee80211_twt_setup *twt);
 	void (*twt_teardown_request)(struct ieee80211_hw *hw,
 				     struct ieee80211_sta *sta, u8 flowid);
-	int (*set_radar_offchan)(struct ieee80211_hw *hw,
-				 struct cfg80211_chan_def *chandef);
+	int (*set_radar_background)(struct ieee80211_hw *hw,
+				    struct cfg80211_chan_def *chandef);
 	int (*net_fill_forward_path)(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif,
 				     struct ieee80211_sta *sta,
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index a8e20d25252b..5ce20fb44f24 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2646,10 +2646,10 @@ enum nl80211_commands {
  *	Mandatory parameter for the transmitting interface to enable MBSSID.
  *	Optional for the non-transmitting interfaces.
  *
- * @NL80211_ATTR_RADAR_OFFCHAN: Configure dedicated offchannel chain available for
- *	radar/CAC detection on some hw. This chain can't be used to transmit
- *	or receive frames and it is bounded to a running wdev.
- *	Offchannel radar/CAC detection allows to avoid the CAC downtime
+ * @NL80211_ATTR_RADAR_BACKGROUND: Configure dedicated offchannel chain
+ *	available for radar/CAC detection on some hw. This chain can't be used
+ *	to transmit or receive frames and it is bounded to a running wdev.
+ *	Background radar/CAC detection allows to avoid the CAC downtime
  *	switching on a different channel during CAC detection on the selected
  *	radar channel.
  *
@@ -3159,7 +3159,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_MBSSID_CONFIG,
 	NL80211_ATTR_MBSSID_ELEMS,
 
-	NL80211_ATTR_RADAR_OFFCHAN,
+	NL80211_ATTR_RADAR_BACKGROUND,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -6066,7 +6066,7 @@ enum nl80211_feature_flags {
  *	frames. Userspace has to share FILS AAD details to the driver by using
  *	@NL80211_CMD_SET_FILS_AAD.
  *
- * @NL80211_EXT_FEATURE_RADAR_OFFCHAN: Device supports offchannel radar/CAC
+ * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC
  *	detection.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
@@ -6135,7 +6135,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE,
 	NL80211_EXT_FEATURE_BSS_COLOR,
 	NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD,
-	NL80211_EXT_FEATURE_RADAR_OFFCHAN,
+	NL80211_EXT_FEATURE_RADAR_BACKGROUND,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From 7f599aeccbd2bcba800c6c7ecc4586fd8cafc1d8 Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Thu, 2 Dec 2021 14:36:10 +0200
Subject: cfg80211: Use the HE operation IE to determine a 6GHz BSS channel

A non-collocated AP whose primary channel is not a PSC channel
may transmit a duplicated beacon on the corresponding PSC channel
in which it would indicate its true primary channel.
Use this inforamtion contained in the HE operation IE to determine
the primary channel of the AP.
In case of invalid infomration ignore it and use the channel
the frame was received on.

Signed-off-by: Ayala Beker <ayala.beker@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20211202143322.71eb2176e54e.I130f678e4aa390973ab39d838bbfe7b2d54bff8e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e29d904eccff..b59baf9dfd67 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6385,17 +6385,6 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
 	u64_to_ether_addr(new_bssid_u64, new_bssid);
 }
 
-/**
- * cfg80211_get_ies_channel_number - returns the channel number from ies
- * @ie: IEs
- * @ielen: length of IEs
- * @band: enum nl80211_band of the channel
- *
- * Returns the channel number, or -1 if none could be determined.
- */
-int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
-				    enum nl80211_band band);
-
 /**
  * cfg80211_is_element_inherited - returns if element ID should be inherited
  * @element: element to check
@@ -6431,6 +6420,19 @@ enum cfg80211_bss_frame_type {
 	CFG80211_BSS_FTYPE_PRESP,
 };
 
+/**
+ * cfg80211_get_ies_channel_number - returns the channel number from ies
+ * @ie: IEs
+ * @ielen: length of IEs
+ * @band: enum nl80211_band of the channel
+ * @ftype: frame type
+ *
+ * Returns the channel number, or -1 if none could be determined.
+ */
+int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
+				    enum nl80211_band band,
+				    enum cfg80211_bss_frame_type ftype);
+
 /**
  * cfg80211_inform_bss_data - inform cfg80211 of a new BSS
  *
-- 
cgit v1.2.3


From 47301a74bbfa80cef876e646a8c5fec03c20fc8d Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <vjakkam@codeaurora.org>
Date: Fri, 26 Nov 2021 12:55:18 +0530
Subject: nl80211: Add support to set AP settings flags with single attribute

In previous method each AP settings flag is represented by a top-level
flag attribute and conversion to enum cfg80211_ap_settings_flags had to
be done before sending them to driver. This commit is to make it easier
to define new AP settings flags and sending them to driver.

This commit also deprecate sending of
%NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in %NL80211_CMD_START_AP. But to
maintain backwards compatibility checks for
%NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in %NL80211_CMD_START_AP when
%NL80211_ATTR_AP_SETTINGS_FLAGS not present in %NL80211_CMD_START_AP.

Signed-off-by: Veerendranath Jakkam <vjakkam@codeaurora.org>
Link: https://lore.kernel.org/r/1637911519-21306-1-git-send-email-vjakkam@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 11 -----------
 include/uapi/linux/nl80211.h | 20 +++++++++++++++++++-
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b59baf9dfd67..d19e48f9fdc6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1187,17 +1187,6 @@ struct cfg80211_unsol_bcast_probe_resp {
 	const u8 *tmpl;
 };
 
-/**
- * enum cfg80211_ap_settings_flags - AP settings flags
- *
- * Used by cfg80211_ap_settings
- *
- * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication
- */
-enum cfg80211_ap_settings_flags {
-	AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0),
-};
-
 /**
  * struct cfg80211_ap_settings - AP configuration
  *
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5ce20fb44f24..ab461b2be157 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2477,7 +2477,9 @@ enum nl80211_commands {
  *	space supports external authentication. This attribute shall be used
  *	with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver
  *	may offload authentication processing to user space if this capability
- *	is indicated in the respective requests from the user space.
+ *	is indicated in the respective requests from the user space. (This flag
+ *	attribute deprecated for %NL80211_CMD_START_AP, use
+ *	%NL80211_ATTR_AP_SETTINGS_FLAGS)
  *
  * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
  *	u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
@@ -2653,6 +2655,10 @@ enum nl80211_commands {
  *	switching on a different channel during CAC detection on the selected
  *	radar channel.
  *
+ * @NL80211_ATTR_AP_SETTINGS_FLAGS: u32 attribute contains ap settings flags,
+ *	enumerated in &enum nl80211_ap_settings_flags. This attribute shall be
+ *	used with %NL80211_CMD_START_AP request.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3161,6 +3167,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_RADAR_BACKGROUND,
 
+	NL80211_ATTR_AP_SETTINGS_FLAGS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -7481,4 +7489,14 @@ enum nl80211_mbssid_config_attributes {
 	NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1,
 };
 
+/**
+ * enum nl80211_ap_settings_flags - AP settings flags
+ *
+ * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external
+ *	authentication.
+ */
+enum nl80211_ap_settings_flags {
+	NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT	= 1 << 0,
+};
+
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From 87c1aec15dee8bdb245aabbd181f9f9e1a4770ae Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <vjakkam@codeaurora.org>
Date: Fri, 26 Nov 2021 12:55:19 +0530
Subject: nl80211: Add support to offload SA Query procedures for AP SME device

Add a flag attribute to use in ap settings to indicate userspace
supports offloading of SA Query procedures to driver. Also add AP SME
device feature flag to advertise that the SA Query procedures offloaded
to driver when userspace indicates support for offloading of SA Query
procedures.

Driver handles SA Query procedures in driver's SME it self and skip
sending SA Query request or response frames to userspace when userspace
indicates support for SA Query procedures offload. But if userspace
doesn't advertise support for SA Query procedures offload driver shall
not offload SA Query procedures handling.

Also userspace with SA Query procedures offload capability shall skip SA
Query specific validations when driver indicates support for handling SA
Query procedures.

Signed-off-by: Veerendranath Jakkam <vjakkam@codeaurora.org>
Link: https://lore.kernel.org/r/1637911519-21306-2-git-send-email-vjakkam@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ab461b2be157..d997252de986 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5760,13 +5760,15 @@ enum nl80211_tdls_operation {
 	NL80211_TDLS_DISABLE_LINK,
 };
 
-/*
+/**
  * enum nl80211_ap_sme_features - device-integrated AP features
- * Reserved for future use, no bits are defined in
- * NL80211_ATTR_DEVICE_AP_SME yet.
+ * @NL80211_AP_SME_SA_QUERY_OFFLOAD: SA Query procedures offloaded to driver
+ *	when user space indicates support for SA Query procedures offload during
+ *	"start ap" with %NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT.
+ */
 enum nl80211_ap_sme_features {
+	NL80211_AP_SME_SA_QUERY_OFFLOAD		= 1 << 0,
 };
- */
 
 /**
  * enum nl80211_feature_flags - device/driver features
@@ -7494,9 +7496,15 @@ enum nl80211_mbssid_config_attributes {
  *
  * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external
  *	authentication.
+ * @NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT: Userspace supports SA Query
+ *	procedures offload to driver. If driver advertises
+ *	%NL80211_AP_SME_SA_QUERY_OFFLOAD in AP SME features, userspace shall
+ *	ignore SA Query procedures and validations when this flag is set by
+ *	userspace.
  */
 enum nl80211_ap_settings_flags {
 	NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT	= 1 << 0,
+	NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT	= 1 << 1,
 };
 
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From d9a8297e873eda9d47aa5895ca47dc12eca0b198 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 1 Dec 2021 11:09:25 +0100
Subject: nl82011: clarify interface combinations wrt. channels

Thinking about MLD (Draft 802.11be) now, it's clear that
we'll have new limitations where different stations (or
links) must be on _different_ channels (or in different
frequency ranges even.) Clarify that the current limit of
multiple channels is a maximum and the same one is OK.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20211201110924.2816d91b6862.I2d997abd525574529f88e941d90aeb640dbb1abf@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d997252de986..f1a9d6594df2 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5578,7 +5578,7 @@ enum nl80211_iface_limit_attrs {
  *	=> allows 8 of AP/GO that can have BI gcd >= min gcd
  *
  *	numbers = [ #{STA} <= 2 ], channels = 2, max = 2
- *	=> allows two STAs on different channels
+ *	=> allows two STAs on the same or on different channels
  *
  *	numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4
  *	=> allows a STA plus three P2P interfaces
-- 
cgit v1.2.3


From 5bc9a9dd75351023793d8aa4116ead005d659729 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 19 Dec 2021 21:51:24 +0200
Subject: rfkill: allow to get the software rfkill state

iwlwifi needs to be able to differentiate between the
software rfkill state and the hardware rfkill state.

The reason for this is that iwlwifi needs to notify any
change in the software rfkill state even when it doesn't
own the device (which means even when the hardware rfkill
is asserted).

In order to be able to know the software rfkill when the
host does not own the device, iwlwifi needs to be able to
ask the state of the software rfkill ignoring the state
of the hardware rfkill.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20211219195124.125689-1-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 231e06b74b50..c35f3962dc4f 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -229,6 +229,13 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
  */
 bool rfkill_blocked(struct rfkill *rfkill);
 
+/**
+ * rfkill_soft_blocked - Query soft rfkill block state
+ *
+ * @rfkill: rfkill struct to query
+ */
+bool rfkill_soft_blocked(struct rfkill *rfkill);
+
 /**
  * rfkill_find_type - Helper for finding rfkill type by name
  * @name: the name of the type
-- 
cgit v1.2.3


From e047d0372689f5d4231eefb731b60ac64720bbf0 Mon Sep 17 00:00:00 2001
From: Ricard Wanderlof <ricardw@axis.com>
Date: Wed, 15 Dec 2021 18:01:24 +0100
Subject: ASoC: tlv320adc3xxx: New codec bindings

DT bindings for Texas Instruments TLV320ADC3001 and TLV320ADC3101
audio ADCs.

Signed-off-by: Ricard Wanderlof <ricardw@axis.com>
Link: https://lore.kernel.org/r/alpine.DEB.2.21.2112151759170.27889@lap5cg0092dnk.se.axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/dt-bindings/sound/tlv320adc3xxx.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/dt-bindings/sound/tlv320adc3xxx.h

(limited to 'include')

diff --git a/include/dt-bindings/sound/tlv320adc3xxx.h b/include/dt-bindings/sound/tlv320adc3xxx.h
new file mode 100644
index 000000000000..ec988439da20
--- /dev/null
+++ b/include/dt-bindings/sound/tlv320adc3xxx.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Devicetree bindings definitions for tlv320adc3xxx driver.
+ *
+ * Copyright (C) 2021 Axis Communications AB
+ */
+#ifndef __DT_TLV320ADC3XXX_H
+#define __DT_TLV320ADC3XXX_H
+
+#define ADC3XXX_GPIO_DISABLED		0 /* I/O buffers powered down */
+#define ADC3XXX_GPIO_INPUT		1 /* Various non-GPIO inputs */
+#define ADC3XXX_GPIO_GPI		2 /* General purpose input */
+#define ADC3XXX_GPIO_GPO		3 /* General purpose output */
+#define ADC3XXX_GPIO_CLKOUT		4 /* Source set in reg. CLKOUT_MUX */
+#define ADC3XXX_GPIO_INT1		5 /* INT1 output */
+#define ADC3XXX_GPIO_INT2		6 /* INT2 output */
+/* value 7 is reserved */
+#define ADC3XXX_GPIO_SECONDARY_BCLK	8 /* Codec interface secondary BCLK */
+#define ADC3XXX_GPIO_SECONDARY_WCLK	9 /* Codec interface secondary WCLK */
+#define ADC3XXX_GPIO_ADC_MOD_CLK	10 /* Clock output for digital mics */
+/* values 11-15 reserved */
+
+#define ADC3XXX_MICBIAS_OFF		0 /* Micbias pin powered off */
+#define ADC3XXX_MICBIAS_2_0V		1 /* Micbias pin set to 2.0V */
+#define ADC3XXX_MICBIAS_2_5V		2 /* Micbias pin set to 2.5V */
+#define ADC3XXX_MICBIAS_AVDD		3 /* Use AVDD voltage for micbias pin */
+
+#endif /* __DT_TLV320ADC3XXX_H */
-- 
cgit v1.2.3


From aade40b62745cf0b4e8a17d43652c5faff354e6b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 20 Dec 2021 13:34:48 +0100
Subject: iommu/iova: Temporarily include dma-mapping.h from iova.h

Some users of iova.h still expect that dma-mapping.h is also included.
Re-add the include until these users are updated to fix compile
failures in the iommu tree.

Acked-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/20211220123448.19996-1-joro@8bytes.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index 0abd48c5e622..cea79cb9f26c 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
 
 /* iova structure */
 struct iova {
-- 
cgit v1.2.3


From e82513696eadcf71048be174b4b71f9903ba4afd Mon Sep 17 00:00:00 2001
From: Daniel Palmer <daniel@0x0f.com>
Date: Mon, 13 Dec 2021 18:40:33 +0900
Subject: dt-bindings: gpio: msc313: Add offsets for ssd20xd

Add the gpio offsets for the SSD201 and SSD202D chips.

Signed-off-by: Daniel Palmer <daniel@0x0f.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 include/dt-bindings/gpio/msc313-gpio.h | 71 ++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/gpio/msc313-gpio.h b/include/dt-bindings/gpio/msc313-gpio.h
index 2dd56683d3c1..5458c6580a02 100644
--- a/include/dt-bindings/gpio/msc313-gpio.h
+++ b/include/dt-bindings/gpio/msc313-gpio.h
@@ -50,4 +50,75 @@
 #define MSC313_GPIO_SPI0_DI	(MSC313_GPIO_SPI0 + 2)
 #define MSC313_GPIO_SPI0_DO	(MSC313_GPIO_SPI0 + 3)
 
+/* SSD20x */
+#define SSD20XD_GPIO_FUART	0
+#define SSD20XD_GPIO_FUART_RX	(SSD20XD_GPIO_FUART + 0)
+#define SSD20XD_GPIO_FUART_TX	(SSD20XD_GPIO_FUART + 1)
+#define SSD20XD_GPIO_FUART_CTS	(SSD20XD_GPIO_FUART + 2)
+#define SSD20XD_GPIO_FUART_RTS	(SSD20XD_GPIO_FUART + 3)
+
+#define SSD20XD_GPIO_SD		(SSD20XD_GPIO_FUART_RTS + 1)
+#define SSD20XD_GPIO_SD_CLK	(SSD20XD_GPIO_SD + 0)
+#define SSD20XD_GPIO_SD_CMD	(SSD20XD_GPIO_SD + 1)
+#define SSD20XD_GPIO_SD_D0	(SSD20XD_GPIO_SD + 2)
+#define SSD20XD_GPIO_SD_D1	(SSD20XD_GPIO_SD + 3)
+#define SSD20XD_GPIO_SD_D2	(SSD20XD_GPIO_SD + 4)
+#define SSD20XD_GPIO_SD_D3	(SSD20XD_GPIO_SD + 5)
+
+#define SSD20XD_GPIO_UART0	(SSD20XD_GPIO_SD_D3 + 1)
+#define SSD20XD_GPIO_UART0_RX	(SSD20XD_GPIO_UART0 + 0)
+#define SSD20XD_GPIO_UART0_TX	(SSD20XD_GPIO_UART0 + 1)
+
+#define SSD20XD_GPIO_UART1	(SSD20XD_GPIO_UART0_TX + 1)
+#define SSD20XD_GPIO_UART1_RX	(SSD20XD_GPIO_UART1 + 0)
+#define SSD20XD_GPIO_UART1_TX	(SSD20XD_GPIO_UART1 + 1)
+
+#define SSD20XD_GPIO_TTL	(SSD20XD_GPIO_UART1_TX + 1)
+#define SSD20XD_GPIO_TTL0	(SSD20XD_GPIO_TTL + 0)
+#define SSD20XD_GPIO_TTL1	(SSD20XD_GPIO_TTL + 1)
+#define SSD20XD_GPIO_TTL2	(SSD20XD_GPIO_TTL + 2)
+#define SSD20XD_GPIO_TTL3	(SSD20XD_GPIO_TTL + 3)
+#define SSD20XD_GPIO_TTL4	(SSD20XD_GPIO_TTL + 4)
+#define SSD20XD_GPIO_TTL5	(SSD20XD_GPIO_TTL + 5)
+#define SSD20XD_GPIO_TTL6	(SSD20XD_GPIO_TTL + 6)
+#define SSD20XD_GPIO_TTL7	(SSD20XD_GPIO_TTL + 7)
+#define SSD20XD_GPIO_TTL8	(SSD20XD_GPIO_TTL + 8)
+#define SSD20XD_GPIO_TTL9	(SSD20XD_GPIO_TTL + 9)
+#define SSD20XD_GPIO_TTL10	(SSD20XD_GPIO_TTL + 10)
+#define SSD20XD_GPIO_TTL11	(SSD20XD_GPIO_TTL + 11)
+#define SSD20XD_GPIO_TTL12	(SSD20XD_GPIO_TTL + 12)
+#define SSD20XD_GPIO_TTL13	(SSD20XD_GPIO_TTL + 13)
+#define SSD20XD_GPIO_TTL14	(SSD20XD_GPIO_TTL + 14)
+#define SSD20XD_GPIO_TTL15	(SSD20XD_GPIO_TTL + 15)
+#define SSD20XD_GPIO_TTL16	(SSD20XD_GPIO_TTL + 16)
+#define SSD20XD_GPIO_TTL17	(SSD20XD_GPIO_TTL + 17)
+#define SSD20XD_GPIO_TTL18	(SSD20XD_GPIO_TTL + 18)
+#define SSD20XD_GPIO_TTL19	(SSD20XD_GPIO_TTL + 19)
+#define SSD20XD_GPIO_TTL20	(SSD20XD_GPIO_TTL + 20)
+#define SSD20XD_GPIO_TTL21	(SSD20XD_GPIO_TTL + 21)
+#define SSD20XD_GPIO_TTL22	(SSD20XD_GPIO_TTL + 22)
+#define SSD20XD_GPIO_TTL23	(SSD20XD_GPIO_TTL + 23)
+#define SSD20XD_GPIO_TTL24	(SSD20XD_GPIO_TTL + 24)
+#define SSD20XD_GPIO_TTL25	(SSD20XD_GPIO_TTL + 25)
+#define SSD20XD_GPIO_TTL26	(SSD20XD_GPIO_TTL + 26)
+#define SSD20XD_GPIO_TTL27	(SSD20XD_GPIO_TTL + 27)
+
+#define SSD20XD_GPIO_GPIO	(SSD20XD_GPIO_TTL27 + 1)
+#define SSD20XD_GPIO_GPIO0	(SSD20XD_GPIO_GPIO + 0)
+#define SSD20XD_GPIO_GPIO1	(SSD20XD_GPIO_GPIO + 1)
+#define SSD20XD_GPIO_GPIO2	(SSD20XD_GPIO_GPIO + 2)
+#define SSD20XD_GPIO_GPIO3	(SSD20XD_GPIO_GPIO + 3)
+#define SSD20XD_GPIO_GPIO4	(SSD20XD_GPIO_GPIO + 4)
+#define SSD20XD_GPIO_GPIO5	(SSD20XD_GPIO_GPIO + 5)
+#define SSD20XD_GPIO_GPIO6	(SSD20XD_GPIO_GPIO + 6)
+#define SSD20XD_GPIO_GPIO7	(SSD20XD_GPIO_GPIO + 7)
+#define SSD20XD_GPIO_GPIO10	(SSD20XD_GPIO_GPIO + 8)
+#define SSD20XD_GPIO_GPIO11	(SSD20XD_GPIO_GPIO + 9)
+#define SSD20XD_GPIO_GPIO12	(SSD20XD_GPIO_GPIO + 10)
+#define SSD20XD_GPIO_GPIO13	(SSD20XD_GPIO_GPIO + 11)
+#define SSD20XD_GPIO_GPIO14	(SSD20XD_GPIO_GPIO + 12)
+#define SSD20XD_GPIO_GPIO85	(SSD20XD_GPIO_GPIO + 13)
+#define SSD20XD_GPIO_GPIO86	(SSD20XD_GPIO_GPIO + 14)
+#define SSD20XD_GPIO_GPIO90	(SSD20XD_GPIO_GPIO + 15)
+
 #endif /* _DT_BINDINGS_MSC313_GPIO_H */
-- 
cgit v1.2.3


From 59f37b7370ef56e6faf25d0e18bc597a0af40bb8 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Sat, 4 Dec 2021 21:57:55 +0200
Subject: tty: serial: samsung: Remove USI initialization

USI control is now extracted to the dedicated USI driver. Remove USI
related code from serial driver to avoid conflicts and code duplication.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20211204195757.8600-4-semen.protsenko@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_s3c.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h
index cf0de4a86640..f6c3323fc4c5 100644
--- a/include/linux/serial_s3c.h
+++ b/include/linux/serial_s3c.h
@@ -27,15 +27,6 @@
 #define S3C2410_UERSTAT	  (0x14)
 #define S3C2410_UFSTAT	  (0x18)
 #define S3C2410_UMSTAT	  (0x1C)
-#define USI_CON		  (0xC4)
-#define USI_OPTION	  (0xC8)
-
-#define USI_CON_RESET			(1<<0)
-#define USI_CON_RESET_MASK		(1<<0)
-
-#define USI_OPTION_HWACG_CLKREQ_ON	(1<<1)
-#define USI_OPTION_HWACG_CLKSTOP_ON	(1<<2)
-#define USI_OPTION_HWACG_MASK		(3<<1)
 
 #define S3C2410_LCON_CFGMASK	  ((0xF<<3)|(0x3))
 
-- 
cgit v1.2.3


From 1a5e91d8375fc8369207cc0b9894a324f2bbf1d9 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Mon, 13 Dec 2021 02:14:02 -0500
Subject: swiotlb: Add swiotlb bounce buffer remap function for HV IVM

In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
extra address space which is above shared_gpa_boundary (E.G 39 bit
address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
physical address will be original physical address + shared_gpa_boundary.
The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
memory(vTOM). Memory addresses below vTOM are automatically treated as
private while memory above vTOM is treated as shared.

Expose swiotlb_unencrypted_base for platforms to set unencrypted
memory base offset and platform calls swiotlb_update_mem_attributes()
to remap swiotlb mem to unencrypted address space. memremap() can
not be called in the early stage and so put remapping code into
swiotlb_update_mem_attributes(). Store remap address and use it to copy
data from/to swiotlb bounce buffer.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20211213071407.314309-2-ltykernel@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/linux/swiotlb.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 569272871375..f6c3638255d5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
  * @end:	The end address of the swiotlb memory pool. Used to do a quick
  *		range check to see if the memory was in fact allocated by this
  *		API.
+ * @vaddr:	The vaddr of the swiotlb memory pool. The swiotlb memory pool
+ *		may be remapped in the memory encrypted case and store virtual
+ *		address for bounce buffer operation.
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
  *		@end. For default swiotlb, this is command line adjustable via
  *		setup_io_tlb_npages.
@@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
 struct io_tlb_mem {
 	phys_addr_t start;
 	phys_addr_t end;
+	void *vaddr;
 	unsigned long nslabs;
 	unsigned long used;
 	unsigned int index;
@@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
 }
 #endif /* CONFIG_DMA_RESTRICTED_POOL */
 
+extern phys_addr_t swiotlb_unencrypted_base;
+
 #endif /* __LINUX_SWIOTLB_H */
-- 
cgit v1.2.3


From 743b237c3a7b0f5b44aa704aae8a1058877b6322 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Mon, 13 Dec 2021 02:14:05 -0500
Subject: scsi: storvsc: Add Isolation VM support for storvsc driver

In Isolation VM, all shared memory with host needs to mark visible
to host via hvcall. vmbus_establish_gpadl() has already done it for
storvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
mpb_desc() still needs to be handled. Use DMA API(scsi_dma_map/unmap)
to map these memory during sending/receiving packet and return swiotlb
bounce buffer dma address. In Isolation VM, swiotlb  bounce buffer is
marked to be visible to host and the swiotlb force mode is enabled.

Set device's dma min align mask to HV_HYP_PAGE_SIZE - 1 in order to
keep the original data offset in the bounce buffer.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Reviewed-by: Long Li <longli@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20211213071407.314309-5-ltykernel@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/linux/hyperv.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b823311eac79..650a0574b746 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1261,6 +1261,7 @@ struct hv_device {
 
 	struct vmbus_channel *channel;
 	struct kset	     *channels_kset;
+	struct device_dma_parameters dma_parms;
 
 	/* place holder to keep track of the dir for hv device in debugfs */
 	struct dentry *debug_dir;
-- 
cgit v1.2.3


From 846da38de0e8224f2f94b885125cf1fd2d7b0d39 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Mon, 13 Dec 2021 02:14:06 -0500
Subject: net: netvsc: Add Isolation VM support for netvsc driver

In Isolation VM, all shared memory with host needs to mark visible
to host via hvcall. vmbus_establish_gpadl() has already done it for
netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
pagebuffer() stills need to be handled. Use DMA API to map/umap
these memory during sending/receiving packet and Hyper-V swiotlb
bounce buffer dma address will be returned. The swiotlb bounce buffer
has been masked to be visible to host during boot up.

rx/tx ring buffer is allocated via vzalloc() and they need to be
mapped into unencrypted address space(above vTOM) before sharing
with host and accessing. Add hv_map/unmap_memory() to map/umap rx
/tx ring buffer.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20211213071407.314309-6-ltykernel@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/asm-generic/mshyperv.h | 2 ++
 include/linux/hyperv.h         | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 3e2248ac328e..94e73ba129c5 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -269,6 +269,8 @@ bool hv_isolation_type_snp(void);
 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
 void hyperv_cleanup(void);
 bool hv_query_ext_cap(u64 cap_query);
+void *hv_map_memory(void *addr, unsigned long size);
+void hv_unmap_memory(void *addr);
 #else /* CONFIG_HYPERV */
 static inline bool hv_is_hyperv_initialized(void) { return false; }
 static inline bool hv_is_hibernation_supported(void) { return false; }
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 650a0574b746..f565a8938836 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1584,6 +1584,11 @@ struct hyperv_service_callback {
 	void (*callback)(void *context);
 };
 
+struct hv_dma_range {
+	dma_addr_t dma;
+	u32 mapping_size;
+};
+
 #define MAX_SRV_VER	0x7ffffff
 extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
 				const int *fw_version, int fw_vercnt,
-- 
cgit v1.2.3


From ed98ea2128b6fd83bce13716edf8f5fe6c47f574 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Fri, 17 Dec 2021 09:01:52 +0800
Subject: audit: replace zero-length array with flexible-array member

Zero-length arrays are deprecated and should be replaced with
flexible-array members.

Link: https://github.com/KSPP/linux/issues/78
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 9176a095fefc..8eda133ca4c1 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -514,7 +514,7 @@ struct audit_rule_data {
 	__u32		values[AUDIT_MAX_FIELDS];
 	__u32		fieldflags[AUDIT_MAX_FIELDS];
 	__u32		buflen;	/* total length of string fields */
-	char		buf[0];	/* string fields buffer */
+	char		buf[];	/* string fields buffer */
 };
 
 #endif /* _UAPI_LINUX_AUDIT_H_ */
-- 
cgit v1.2.3


From 6fc61c39ee1adb5f4115d288c876772fcd8b6979 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@somainline.org>
Date: Sun, 21 Nov 2021 01:20:46 +0100
Subject: soc: qcom: llcc: Add configuration data for SM8350

Add LLCC configuration data for SM8350 SoC.

Signed-off-by: Konrad Dybcio <konrad.dybcio@somainline.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211121002050.36977-2-konrad.dybcio@somainline.org
---
 include/linux/soc/qcom/llcc-qcom.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 437c9df13229..9e8fd92c96b7 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -33,6 +33,9 @@
 #define LLCC_MODPE       29
 #define LLCC_APTCM       30
 #define LLCC_WRCACHE     31
+#define LLCC_CVPFW       32
+#define LLCC_CPUSS1      33
+#define LLCC_CPUHWT      36
 
 /**
  * struct llcc_slice_desc - Cache slice descriptor
-- 
cgit v1.2.3


From 8712107740ad272bd89c873ec850146ac3d8832a Mon Sep 17 00:00:00 2001
From: Martin Botka <martin.botka@somainline.org>
Date: Tue, 30 Nov 2021 22:23:29 +0100
Subject: dt-bindings: qcom-rpmpd: Add sm6125 power domains

Add dt-bindings for sm6125 SoC RPM Power Domains

Signed-off-by: Martin Botka <martin.botka@somainline.org>
[bjorn: Added compatible to binding as well]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211130212332.25401-1-martin.botka@somainline.org
---
 include/dt-bindings/power/qcom-rpmpd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h
index 340b0ffe5eb8..5b64059ef43e 100644
--- a/include/dt-bindings/power/qcom-rpmpd.h
+++ b/include/dt-bindings/power/qcom-rpmpd.h
@@ -219,6 +219,14 @@
 #define SM6115_VDD_LPI_CX	6
 #define SM6115_VDD_LPI_MX	7
 
+/* SM6125 Power Domains */
+#define SM6125_VDDCX		0
+#define SM6125_VDDCX_AO		1
+#define SM6125_VDDCX_VFL	2
+#define SM6125_VDDMX		3
+#define SM6125_VDDMX_AO		4
+#define SM6125_VDDMX_VFL	5
+
 /* QCM2290 Power Domains */
 #define QCM2290_VDDCX		0
 #define QCM2290_VDDCX_AO	1
-- 
cgit v1.2.3


From 22c755708c2395c58eb7670357baa110ba1ca2e0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Wed, 1 Dec 2021 12:57:43 +0530
Subject: dt-bindings: power: rpmpd: Add SM8450 to rpmpd binding

Add compatible and constants for the power domains exposed by the RPMH
in the Qualcomm SM8450 platform.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211201072745.3969077-6-vkoul@kernel.org
---
 include/dt-bindings/power/qcom-rpmpd.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h
index 5b64059ef43e..edfc1ff2acb3 100644
--- a/include/dt-bindings/power/qcom-rpmpd.h
+++ b/include/dt-bindings/power/qcom-rpmpd.h
@@ -68,6 +68,21 @@
 #define SM8350_MXC_AO	11
 #define SM8350_MSS	12
 
+/* SM8450 Power Domain Indexes */
+#define SM8450_CX	0
+#define SM8450_CX_AO	1
+#define SM8450_EBI	2
+#define SM8450_GFX	3
+#define SM8450_LCX	4
+#define SM8450_LMX	5
+#define SM8450_MMCX	6
+#define SM8450_MMCX_AO	7
+#define SM8450_MX	8
+#define SM8450_MX_AO	9
+#define SM8450_MXC	10
+#define SM8450_MXC_AO	11
+#define SM8450_MSS	12
+
 /* SC7180 Power Domain Indexes */
 #define SC7180_CX	0
 #define SC7180_CX_AO	1
-- 
cgit v1.2.3


From dbcefdeb2a58039f4c81d0361056fbdd9be906a1 Mon Sep 17 00:00:00 2001
From: Matt Johnston <matt@codeconstruct.com.au>
Date: Mon, 20 Dec 2021 10:31:04 +0800
Subject: mctp: emit RTM_NEWADDR and RTM_DELADDR

Userspace can receive notification of MCTP address changes via
RTNLGRP_MCTP_IFADDR rtnetlink multicast group.

Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
Link: https://lore.kernel.org/r/20211220023104.1965509-1-matt@codeconstruct.com.au
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rtnetlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5888492a5257..93d934cc4613 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -754,6 +754,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_NEXTHOP		RTNLGRP_NEXTHOP
 	RTNLGRP_BRVLAN,
 #define RTNLGRP_BRVLAN		RTNLGRP_BRVLAN
+	RTNLGRP_MCTP_IFADDR,
+#define RTNLGRP_MCTP_IFADDR	RTNLGRP_MCTP_IFADDR
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
-- 
cgit v1.2.3


From 8f905c0e7354ef261360fb7535ea079b1082c105 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 20 Dec 2021 06:33:30 -0800
Subject: inet: fully convert sk->sk_rx_dst to RCU rules

syzbot reported various issues around early demux,
one being included in this changelog [1]

sk->sk_rx_dst is using RCU protection without clearly
documenting it.

And following sequences in tcp_v4_do_rcv()/tcp_v6_do_rcv()
are not following standard RCU rules.

[a]    dst_release(dst);
[b]    sk->sk_rx_dst = NULL;

They look wrong because a delete operation of RCU protected
pointer is supposed to clear the pointer before
the call_rcu()/synchronize_rcu() guarding actual memory freeing.

In some cases indeed, dst could be freed before [b] is done.

We could cheat by clearing sk_rx_dst before calling
dst_release(), but this seems the right time to stick
to standard RCU annotations and debugging facilities.

[1]
BUG: KASAN: use-after-free in dst_check include/net/dst.h:470 [inline]
BUG: KASAN: use-after-free in tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792
Read of size 2 at addr ffff88807f1cb73a by task syz-executor.5/9204

CPU: 0 PID: 9204 Comm: syz-executor.5 Not tainted 5.16.0-rc5-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247
 __kasan_report mm/kasan/report.c:433 [inline]
 kasan_report.cold+0x83/0xdf mm/kasan/report.c:450
 dst_check include/net/dst.h:470 [inline]
 tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792
 ip_rcv_finish_core.constprop.0+0x15de/0x1e80 net/ipv4/ip_input.c:340
 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583
 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline]
 ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644
 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline]
 __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556
 __netif_receive_skb_list net/core/dev.c:5608 [inline]
 netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699
 gro_normal_list net/core/dev.c:5853 [inline]
 gro_normal_list net/core/dev.c:5849 [inline]
 napi_complete_done+0x1f1/0x880 net/core/dev.c:6590
 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline]
 virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557
 __napi_poll+0xaf/0x440 net/core/dev.c:7023
 napi_poll net/core/dev.c:7090 [inline]
 net_rx_action+0x801/0xb40 net/core/dev.c:7177
 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558
 invoke_softirq kernel/softirq.c:432 [inline]
 __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637
 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649
 common_interrupt+0x52/0xc0 arch/x86/kernel/irq.c:240
 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:629
RIP: 0033:0x7f5e972bfd57
Code: 39 d1 73 14 0f 1f 80 00 00 00 00 48 8b 50 f8 48 83 e8 08 48 39 ca 77 f3 48 39 c3 73 3e 48 89 13 48 8b 50 f8 48 89 38 49 8b 0e <48> 8b 3e 48 83 c3 08 48 83 c6 08 eb bc 48 39 d1 72 9e 48 39 d0 73
RSP: 002b:00007fff8a413210 EFLAGS: 00000283
RAX: 00007f5e97108990 RBX: 00007f5e97108338 RCX: ffffffff81d3aa45
RDX: ffffffff81d3aa45 RSI: 00007f5e97108340 RDI: ffffffff81d3aa45
RBP: 00007f5e97107eb8 R08: 00007f5e97108d88 R09: 0000000093c2e8d9
R10: 0000000000000000 R11: 0000000000000000 R12: 00007f5e97107eb0
R13: 00007f5e97108338 R14: 00007f5e97107ea8 R15: 0000000000000019
 </TASK>

Allocated by task 13:
 kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38
 kasan_set_track mm/kasan/common.c:46 [inline]
 set_alloc_info mm/kasan/common.c:434 [inline]
 __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:467
 kasan_slab_alloc include/linux/kasan.h:259 [inline]
 slab_post_alloc_hook mm/slab.h:519 [inline]
 slab_alloc_node mm/slub.c:3234 [inline]
 slab_alloc mm/slub.c:3242 [inline]
 kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3247
 dst_alloc+0x146/0x1f0 net/core/dst.c:92
 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613
 ip_route_input_slow+0x1817/0x3a20 net/ipv4/route.c:2340
 ip_route_input_rcu net/ipv4/route.c:2470 [inline]
 ip_route_input_noref+0x116/0x2a0 net/ipv4/route.c:2415
 ip_rcv_finish_core.constprop.0+0x288/0x1e80 net/ipv4/ip_input.c:354
 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583
 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline]
 ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644
 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline]
 __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556
 __netif_receive_skb_list net/core/dev.c:5608 [inline]
 netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699
 gro_normal_list net/core/dev.c:5853 [inline]
 gro_normal_list net/core/dev.c:5849 [inline]
 napi_complete_done+0x1f1/0x880 net/core/dev.c:6590
 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline]
 virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557
 __napi_poll+0xaf/0x440 net/core/dev.c:7023
 napi_poll net/core/dev.c:7090 [inline]
 net_rx_action+0x801/0xb40 net/core/dev.c:7177
 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558

Freed by task 13:
 kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38
 kasan_set_track+0x21/0x30 mm/kasan/common.c:46
 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370
 ____kasan_slab_free mm/kasan/common.c:366 [inline]
 ____kasan_slab_free mm/kasan/common.c:328 [inline]
 __kasan_slab_free+0xff/0x130 mm/kasan/common.c:374
 kasan_slab_free include/linux/kasan.h:235 [inline]
 slab_free_hook mm/slub.c:1723 [inline]
 slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1749
 slab_free mm/slub.c:3513 [inline]
 kmem_cache_free+0xbd/0x5d0 mm/slub.c:3530
 dst_destroy+0x2d6/0x3f0 net/core/dst.c:127
 rcu_do_batch kernel/rcu/tree.c:2506 [inline]
 rcu_core+0x7ab/0x1470 kernel/rcu/tree.c:2741
 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558

Last potentially related work creation:
 kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38
 __kasan_record_aux_stack+0xf5/0x120 mm/kasan/generic.c:348
 __call_rcu kernel/rcu/tree.c:2985 [inline]
 call_rcu+0xb1/0x740 kernel/rcu/tree.c:3065
 dst_release net/core/dst.c:177 [inline]
 dst_release+0x79/0xe0 net/core/dst.c:167
 tcp_v4_do_rcv+0x612/0x8d0 net/ipv4/tcp_ipv4.c:1712
 sk_backlog_rcv include/net/sock.h:1030 [inline]
 __release_sock+0x134/0x3b0 net/core/sock.c:2768
 release_sock+0x54/0x1b0 net/core/sock.c:3300
 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1441
 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:724
 sock_write_iter+0x289/0x3c0 net/socket.c:1057
 call_write_iter include/linux/fs.h:2162 [inline]
 new_sync_write+0x429/0x660 fs/read_write.c:503
 vfs_write+0x7cd/0xae0 fs/read_write.c:590
 ksys_write+0x1ee/0x250 fs/read_write.c:643
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

The buggy address belongs to the object at ffff88807f1cb700
 which belongs to the cache ip_dst_cache of size 176
The buggy address is located 58 bytes inside of
 176-byte region [ffff88807f1cb700, ffff88807f1cb7b0)
The buggy address belongs to the page:
page:ffffea0001fc72c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7f1cb
flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000000200 dead000000000100 dead000000000122 ffff8881413bb780
raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 5, ts 108466983062, free_ts 108048976062
 prep_new_page mm/page_alloc.c:2418 [inline]
 get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149
 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369
 alloc_pages+0x1a7/0x300 mm/mempolicy.c:2191
 alloc_slab_page mm/slub.c:1793 [inline]
 allocate_slab mm/slub.c:1930 [inline]
 new_slab+0x32d/0x4a0 mm/slub.c:1993
 ___slab_alloc+0x918/0xfe0 mm/slub.c:3022
 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3109
 slab_alloc_node mm/slub.c:3200 [inline]
 slab_alloc mm/slub.c:3242 [inline]
 kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3247
 dst_alloc+0x146/0x1f0 net/core/dst.c:92
 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613
 __mkroute_output net/ipv4/route.c:2564 [inline]
 ip_route_output_key_hash_rcu+0x921/0x2d00 net/ipv4/route.c:2791
 ip_route_output_key_hash+0x18b/0x300 net/ipv4/route.c:2619
 __ip_route_output_key include/net/route.h:126 [inline]
 ip_route_output_flow+0x23/0x150 net/ipv4/route.c:2850
 ip_route_output_key include/net/route.h:142 [inline]
 geneve_get_v4_rt+0x3a6/0x830 drivers/net/geneve.c:809
 geneve_xmit_skb drivers/net/geneve.c:899 [inline]
 geneve_xmit+0xc4a/0x3540 drivers/net/geneve.c:1082
 __netdev_start_xmit include/linux/netdevice.h:4994 [inline]
 netdev_start_xmit include/linux/netdevice.h:5008 [inline]
 xmit_one net/core/dev.c:3590 [inline]
 dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3606
 __dev_queue_xmit+0x299a/0x3650 net/core/dev.c:4229
page last free stack trace:
 reset_page_owner include/linux/page_owner.h:24 [inline]
 free_pages_prepare mm/page_alloc.c:1338 [inline]
 free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389
 free_unref_page_prepare mm/page_alloc.c:3309 [inline]
 free_unref_page+0x19/0x690 mm/page_alloc.c:3388
 qlink_free mm/kasan/quarantine.c:146 [inline]
 qlist_free_all+0x5a/0xc0 mm/kasan/quarantine.c:165
 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272
 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:444
 kasan_slab_alloc include/linux/kasan.h:259 [inline]
 slab_post_alloc_hook mm/slab.h:519 [inline]
 slab_alloc_node mm/slub.c:3234 [inline]
 kmem_cache_alloc_node+0x255/0x3f0 mm/slub.c:3270
 __alloc_skb+0x215/0x340 net/core/skbuff.c:414
 alloc_skb include/linux/skbuff.h:1126 [inline]
 alloc_skb_with_frags+0x93/0x620 net/core/skbuff.c:6078
 sock_alloc_send_pskb+0x783/0x910 net/core/sock.c:2575
 mld_newpack+0x1df/0x770 net/ipv6/mcast.c:1754
 add_grhead+0x265/0x330 net/ipv6/mcast.c:1857
 add_grec+0x1053/0x14e0 net/ipv6/mcast.c:1995
 mld_send_initial_cr.part.0+0xf6/0x230 net/ipv6/mcast.c:2242
 mld_send_initial_cr net/ipv6/mcast.c:1232 [inline]
 mld_dad_work+0x1d3/0x690 net/ipv6/mcast.c:2268
 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298
 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445

Memory state around the buggy address:
 ffff88807f1cb600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff88807f1cb680: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc
>ffff88807f1cb700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                        ^
 ffff88807f1cb780: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc
 ffff88807f1cb800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20211220143330.680945-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index bea21ff70e74..d47e9658da28 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -431,7 +431,7 @@ struct sock {
 #ifdef CONFIG_XFRM
 	struct xfrm_policy __rcu *sk_policy[2];
 #endif
-	struct dst_entry	*sk_rx_dst;
+	struct dst_entry __rcu	*sk_rx_dst;
 	int			sk_rx_dst_ifindex;
 	u32			sk_rx_dst_cookie;
 
-- 
cgit v1.2.3


From 7e5cced9ca84df52d874aca6b632f930b3dc5bc6 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 20 Dec 2021 09:49:01 -0500
Subject: net: accept UFOv6 packages in virtio_net_hdr_to_skb

Skb with skb->protocol 0 at the time of virtio_net_hdr_to_skb may have
a protocol inferred from virtio_net_hdr with virtio_net_hdr_set_proto.

Unlike TCP, UDP does not have separate types for IPv4 and IPv6. Type
VIRTIO_NET_HDR_GSO_UDP is guessed to be IPv4/UDP. As of the below
commit, UFOv6 packets are dropped due to not matching the protocol as
obtained from dev_parse_header_protocol.

Invert the test to take that L2 protocol field as starting point and
pass both UFOv4 and UFOv6 for VIRTIO_NET_HDR_GSO_UDP.

Fixes: 924a9bc362a5 ("net: check if protocol extracted by virtio_net_hdr_set_proto is correct")
Link: https://lore.kernel.org/netdev/CABcq3pG9GRCYqFDBAJ48H1vpnnX=41u+MhQnayF1ztLH4WX0Fw@mail.gmail.com/
Reported-by: Andrew Melnichenko <andrew@daynix.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20211220144901.2784030-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio_net.h | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 04e87f4b9417..22dd48c82560 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -7,6 +7,21 @@
 #include <uapi/linux/udp.h>
 #include <uapi/linux/virtio_net.h>
 
+static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
+{
+	switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+	case VIRTIO_NET_HDR_GSO_TCPV4:
+		return protocol == cpu_to_be16(ETH_P_IP);
+	case VIRTIO_NET_HDR_GSO_TCPV6:
+		return protocol == cpu_to_be16(ETH_P_IPV6);
+	case VIRTIO_NET_HDR_GSO_UDP:
+		return protocol == cpu_to_be16(ETH_P_IP) ||
+		       protocol == cpu_to_be16(ETH_P_IPV6);
+	default:
+		return false;
+	}
+}
+
 static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
 					   const struct virtio_net_hdr *hdr)
 {
@@ -88,9 +103,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 			if (!skb->protocol) {
 				__be16 protocol = dev_parse_header_protocol(skb);
 
-				virtio_net_hdr_set_proto(skb, hdr);
-				if (protocol && protocol != skb->protocol)
+				if (!protocol)
+					virtio_net_hdr_set_proto(skb, hdr);
+				else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type))
 					return -EINVAL;
+				else
+					skb->protocol = protocol;
 			}
 retry:
 			if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
-- 
cgit v1.2.3


From 1ed1d592113959f00cc552c3b9f47ca2d157768f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 20 Dec 2021 09:50:27 -0500
Subject: net: skip virtio_net_hdr_set_proto if protocol already set

virtio_net_hdr_set_proto infers skb->protocol from the virtio_net_hdr
gso_type, to avoid packets getting dropped for lack of a proto type.

Its protocol choice is a guess, especially in the case of UFO, where
the single VIRTIO_NET_HDR_GSO_UDP label covers both UFOv4 and UFOv6.

Skip this best effort if the field is already initialized. Whether
explicitly from userspace, or implicitly based on an earlier call to
dev_parse_header_protocol (which is more robust, but was introduced
after this patch).

Fixes: 9d2f67e43b73 ("net/packet: fix packet drop as of virtio gso")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20211220145027.2784293-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/virtio_net.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 22dd48c82560..a960de68ac69 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -25,6 +25,9 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
 static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
 					   const struct virtio_net_hdr *hdr)
 {
+	if (skb->protocol)
+		return 0;
+
 	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 	case VIRTIO_NET_HDR_GSO_TCPV4:
 	case VIRTIO_NET_HDR_GSO_UDP:
-- 
cgit v1.2.3


From 80a5ca99c5c04be6777df225ab932142a9d60c3f Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Thu, 16 Dec 2021 11:33:00 +0800
Subject: rapidio: remove not used macro definition in rio_ids.h

The definition of RIO_VID_FREESCALE, RIO_DID_MPC8560, RIO_DID_TSI500,
RIO_DID_TSI576 and RIO_DID_TSI721 are not used for many years in the
current code, so just remove them.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Link: https://lore.kernel.org/r/1639625581-22867-2-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/rio_ids.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index 4846f72759b2..e74d8840708a 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -9,15 +9,10 @@
 #ifndef LINUX_RIO_IDS_H
 #define LINUX_RIO_IDS_H
 
-#define RIO_VID_FREESCALE		0x0002
-#define RIO_DID_MPC8560			0x0003
-
 #define RIO_VID_TUNDRA			0x000d
-#define RIO_DID_TSI500			0x0500
 #define RIO_DID_TSI568			0x0568
 #define RIO_DID_TSI572			0x0572
 #define RIO_DID_TSI574			0x0574
-#define RIO_DID_TSI576			0x0578 /* Same ID as Tsi578 */
 #define RIO_DID_TSI577			0x0577
 #define RIO_DID_TSI578			0x0578
 
@@ -33,7 +28,6 @@
 #define RIO_DID_IDTCPS1616		0x0379
 #define RIO_DID_IDTVPS1616		0x0377
 #define RIO_DID_IDTSPS1616		0x0378
-#define RIO_DID_TSI721			0x80ab
 #define RIO_DID_IDTRXS1632		0x80e5
 #define RIO_DID_IDTRXS2448		0x80e6
 
-- 
cgit v1.2.3


From 612d4904191ff9aca01b1e087d8687b3a223cb33 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Thu, 16 Dec 2021 11:33:01 +0800
Subject: rapidio: remove not used code about RIO_VID_TUNDRA

According to https://rapidio.org/vendor-id/, there is no 0x000d vendor id
in the complete and current list of VendorIDs, it means that the related
code is dead code now, so just remove them.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Link: https://lore.kernel.org/r/1639625581-22867-3-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/rio_ids.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index e74d8840708a..c7e2f21dd5c1 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -9,13 +9,6 @@
 #ifndef LINUX_RIO_IDS_H
 #define LINUX_RIO_IDS_H
 
-#define RIO_VID_TUNDRA			0x000d
-#define RIO_DID_TSI568			0x0568
-#define RIO_DID_TSI572			0x0572
-#define RIO_DID_TSI574			0x0574
-#define RIO_DID_TSI577			0x0577
-#define RIO_DID_TSI578			0x0578
-
 #define RIO_VID_IDT			0x0038
 #define RIO_DID_IDT70K200		0x0310
 #define RIO_DID_IDTCPS8			0x035c
-- 
cgit v1.2.3


From 0032ca576a79946492194ae4860b462d32815c66 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 21 Dec 2021 17:16:46 +0900
Subject: counter: Add the necessary colons and indents to the comments of
 counter_compi

Since commit aaec1a0f76ec ("counter: Internalize sysfs interface code")
introduce a warning as:

linux-next/Documentation/driver-api/generic-counter:234: ./include/linux/counter.h:43: WARNING: Unexpected indentation.
linux-next/Documentation/driver-api/generic-counter:234: ./include/linux/counter.h:45: WARNING: Block quote ends without a blank line; unexpected unindent.

Add the necessary colons and indents.

Fixes: aaec1a0f76ec ("counter: Internalize sysfs interface code")
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/26011e814d6eca02c7ebdbb92f171a49928a7e89.1640072891.git.vilhelm.gray@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/counter.h | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/counter.h b/include/linux/counter.h
index b7d0a00a61cf..dfbde2808998 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -38,64 +38,64 @@ enum counter_comp_type {
  * @type:		Counter component data type
  * @name:		device-specific component name
  * @priv:		component-relevant data
- * @action_read		Synapse action mode read callback. The read value of the
+ * @action_read:		Synapse action mode read callback. The read value of the
  *			respective Synapse action mode should be passed back via
  *			the action parameter.
- * @device_u8_read	Device u8 component read callback. The read value of the
+ * @device_u8_read:		Device u8 component read callback. The read value of the
  *			respective Device u8 component should be passed back via
  *			the val parameter.
- * @count_u8_read	Count u8 component read callback. The read value of the
+ * @count_u8_read:		Count u8 component read callback. The read value of the
  *			respective Count u8 component should be passed back via
  *			the val parameter.
- * @signal_u8_read	Signal u8 component read callback. The read value of the
+ * @signal_u8_read:		Signal u8 component read callback. The read value of the
  *			respective Signal u8 component should be passed back via
  *			the val parameter.
- * @device_u32_read	Device u32 component read callback. The read value of
+ * @device_u32_read:		Device u32 component read callback. The read value of
  *			the respective Device u32 component should be passed
  *			back via the val parameter.
- * @count_u32_read	Count u32 component read callback. The read value of the
+ * @count_u32_read:		Count u32 component read callback. The read value of the
  *			respective Count u32 component should be passed back via
  *			the val parameter.
- * @signal_u32_read	Signal u32 component read callback. The read value of
+ * @signal_u32_read:		Signal u32 component read callback. The read value of
  *			the respective Signal u32 component should be passed
  *			back via the val parameter.
- * @device_u64_read	Device u64 component read callback. The read value of
+ * @device_u64_read:		Device u64 component read callback. The read value of
  *			the respective Device u64 component should be passed
  *			back via the val parameter.
- * @count_u64_read	Count u64 component read callback. The read value of the
+ * @count_u64_read:		Count u64 component read callback. The read value of the
  *			respective Count u64 component should be passed back via
  *			the val parameter.
- * @signal_u64_read	Signal u64 component read callback. The read value of
+ * @signal_u64_read:		Signal u64 component read callback. The read value of
  *			the respective Signal u64 component should be passed
  *			back via the val parameter.
- * @action_write	Synapse action mode write callback. The write value of
+ * @action_write:		Synapse action mode write callback. The write value of
  *			the respective Synapse action mode is passed via the
  *			action parameter.
- * @device_u8_write	Device u8 component write callback. The write value of
+ * @device_u8_write:		Device u8 component write callback. The write value of
  *			the respective Device u8 component is passed via the val
  *			parameter.
- * @count_u8_write	Count u8 component write callback. The write value of
+ * @count_u8_write:		Count u8 component write callback. The write value of
  *			the respective Count u8 component is passed via the val
  *			parameter.
- * @signal_u8_write	Signal u8 component write callback. The write value of
+ * @signal_u8_write:		Signal u8 component write callback. The write value of
  *			the respective Signal u8 component is passed via the val
  *			parameter.
- * @device_u32_write	Device u32 component write callback. The write value of
+ * @device_u32_write:		Device u32 component write callback. The write value of
  *			the respective Device u32 component is passed via the
  *			val parameter.
- * @count_u32_write	Count u32 component write callback. The write value of
+ * @count_u32_write:		Count u32 component write callback. The write value of
  *			the respective Count u32 component is passed via the val
  *			parameter.
- * @signal_u32_write	Signal u32 component write callback. The write value of
+ * @signal_u32_write:		Signal u32 component write callback. The write value of
  *			the respective Signal u32 component is passed via the
  *			val parameter.
- * @device_u64_write	Device u64 component write callback. The write value of
+ * @device_u64_write:		Device u64 component write callback. The write value of
  *			the respective Device u64 component is passed via the
  *			val parameter.
- * @count_u64_write	Count u64 component write callback. The write value of
+ * @count_u64_write:		Count u64 component write callback. The write value of
  *			the respective Count u64 component is passed via the val
  *			parameter.
- * @signal_u64_write	Signal u64 component write callback. The write value of
+ * @signal_u64_write:		Signal u64 component write callback. The write value of
  *			the respective Signal u64 component is passed via the
  *			val parameter.
  */
-- 
cgit v1.2.3


From 79f1c7304295bbbc611bc53cfd5425b777b3e840 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 9 Dec 2021 14:30:08 +0200
Subject: kernfs: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20211209123008.3391-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernfs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 9f650986a81b..861c4f0f8a29 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -6,7 +6,6 @@
 #ifndef __LINUX_KERNFS_H
 #define __LINUX_KERNFS_H
 
-#include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -14,6 +13,8 @@
 #include <linux/lockdep.h>
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/types.h>
 #include <linux/uidgid.h>
 #include <linux/wait.h>
 #include <linux/rwsem.h>
@@ -23,6 +24,7 @@ struct dentry;
 struct iattr;
 struct seq_file;
 struct vm_area_struct;
+struct vm_operations_struct;
 struct super_block;
 struct file_system_type;
 struct poll_table_struct;
-- 
cgit v1.2.3


From eca6e2d4a4a4b824f055eeaaa24f1c2327fb91a2 Mon Sep 17 00:00:00 2001
From: Anand Ashok Dumbre <anand.ashok.dumbre@xilinx.com>
Date: Fri, 3 Dec 2021 21:23:54 +0000
Subject: device property: Add fwnode_iomap()

This patch introduces a new helper routine - fwnode_iomap(), which
allows to map the memory mapped IO for a given device node.

This implementation does not cover the ACPI case and may be expanded
in the future. The main purpose here is to be able to develop resource
provider agnostic drivers.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Anand Ashok Dumbre <anand.ashok.dumbre@xilinx.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20211203212358.31444-2-anand.ashok.dumbre@xilinx.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/property.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/property.h b/include/linux/property.h
index 88fa726a76df..6670d5a1ec2a 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -122,6 +122,8 @@ void fwnode_handle_put(struct fwnode_handle *fwnode);
 
 int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index);
 
+void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index);
+
 unsigned int device_get_child_node_count(struct device *dev);
 
 static inline bool device_property_read_bool(struct device *dev,
-- 
cgit v1.2.3


From 1b0b6cc8030d08d2a24e9e5f85dc36c5a58200ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 24 Nov 2021 00:27:01 +0100
Subject: power: supply: add charge_behaviour attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This a revised version of
"[RFC] add standardized attributes for force_discharge and inhibit_charge" [0],
incorporating discussion results.

The biggest change is the switch from two boolean attributes to a single
enum attribute.

[0] https://lore.kernel.org/platform-driver-x86/21569a89-8303-8573-05fb-c2fec29983d1@gmail.com/

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20211123232704.25394-2-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/power_supply.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 9ca1f120a211..70c333e86293 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -132,6 +132,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX,
 	POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD, /* in percents! */
 	POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD, /* in percents! */
+	POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR,
 	POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
 	POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT,
 	POWER_SUPPLY_PROP_INPUT_POWER_LIMIT,
@@ -202,6 +203,12 @@ enum power_supply_usb_type {
 	POWER_SUPPLY_USB_TYPE_APPLE_BRICK_ID,	/* Apple Charging Method */
 };
 
+enum power_supply_charge_behaviour {
+	POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO = 0,
+	POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE,
+	POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE,
+};
+
 enum power_supply_notifier_events {
 	PSY_EVENT_PROP_CHANGED,
 };
-- 
cgit v1.2.3


From 539b9c94ac83563842a27e8cc3de5164b15c4de0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 24 Nov 2021 00:27:02 +0100
Subject: power: supply: add helpers for charge_behaviour sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These helper functions can be used by drivers to implement their own
sysfs-attributes.
This is useful for ACPI-drivers extending the default ACPI-battery with
their own charge_behaviour attributes.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20211123232704.25394-3-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/power_supply.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 70c333e86293..71f0379c2af8 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -546,4 +546,13 @@ static inline
 void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {}
 #endif
 
+#ifdef CONFIG_SYSFS
+ssize_t power_supply_charge_behaviour_show(struct device *dev,
+					   unsigned int available_behaviours,
+					   enum power_supply_charge_behaviour behaviour,
+					   char *buf);
+
+int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf);
+#endif
+
 #endif /* __LINUX_POWER_SUPPLY_H__ */
-- 
cgit v1.2.3


From 37ae5a0f5287a52cf51242e76ccf198d02ffe495 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Sat, 18 Dec 2021 18:41:56 +0900
Subject: block: use "unsigned long" for blk_validate_block_size().

Since lo_simple_ioctl(LOOP_SET_BLOCK_SIZE) and ioctl(NBD_SET_BLKSIZE) pass
user-controlled "unsigned long arg" to blk_validate_block_size(),
"unsigned long" should be used for validation.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/9ecbf057-4375-c2db-ab53-e4cc0dff953d@i-love.sakura.ne.jp
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c80cfaefc0a8..bb5fb7282e6e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -45,7 +45,7 @@ struct blk_crypto_profile;
  */
 #define BLKCG_MAX_POLS		6
 
-static inline int blk_validate_block_size(unsigned int bsize)
+static inline int blk_validate_block_size(unsigned long bsize)
 {
 	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
 		return -EINVAL;
-- 
cgit v1.2.3


From 5f174ec3c1d62013f86db6597249174d8cb227b2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 20 Sep 2021 21:32:49 +0000
Subject: logic_io instance of iounmap() needs volatile on argument

... same as the rest of implementations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/asm-generic/logic_io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/logic_io.h b/include/asm-generic/logic_io.h
index a53116b8c57e..8a59b6e567df 100644
--- a/include/asm-generic/logic_io.h
+++ b/include/asm-generic/logic_io.h
@@ -34,7 +34,7 @@
 void __iomem *ioremap(phys_addr_t offset, size_t size);
 
 #define iounmap iounmap
-void iounmap(void __iomem *addr);
+void iounmap(void volatile __iomem *addr);
 
 #define __raw_readb __raw_readb
 u8 __raw_readb(const volatile void __iomem *addr);
-- 
cgit v1.2.3


From cb8747b7d2a9e3d687a19a007575071d4b71cd05 Mon Sep 17 00:00:00 2001
From: Ismael Luceno <ismael@iodev.co.uk>
Date: Mon, 15 Nov 2021 14:46:47 +0100
Subject: uapi: Fix undefined __always_inline on non-glibc systems

This macro is defined by glibc itself, which makes the issue go unnoticed on
those systems.  On non-glibc systems it causes build failures on several
utilities and libraries, like bpftool and objtool.

Fixes: 1d509f2a6ebc ("x86/insn: Support big endian cross-compiles")
Fixes: 2d7ce0e8a704 ("tools/virtio: more stubs")
Fixes: 3fb321fde22d ("selftests/net: ipv6 flowlabel")
Fixes: 50b3ed57dee9 ("selftests/bpf: test bpf flow dissection")
Fixes: 9cacf81f8161 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE")
Fixes: a4b2061242ec ("tools include uapi: Grab a copy of linux/in.h")
Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality")
Fixes: c0dd967818a2 ("tools, include: Grab a copy of linux/erspan.h")
Fixes: c4b6014e8bb0 ("tools: Add copy of perf_event.h to tools/include/linux/")

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20211115134647.1921-1-ismael@iodev.co.uk
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
---
 include/uapi/linux/byteorder/big_endian.h    | 1 +
 include/uapi/linux/byteorder/little_endian.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h
index 2199adc6a6c2..80aa5c41a763 100644
--- a/include/uapi/linux/byteorder/big_endian.h
+++ b/include/uapi/linux/byteorder/big_endian.h
@@ -9,6 +9,7 @@
 #define __BIG_ENDIAN_BITFIELD
 #endif
 
+#include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/swab.h>
 
diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h
index 601c904fd5cd..cd98982e7523 100644
--- a/include/uapi/linux/byteorder/little_endian.h
+++ b/include/uapi/linux/byteorder/little_endian.h
@@ -9,6 +9,7 @@
 #define __LITTLE_ENDIAN_BITFIELD
 #endif
 
+#include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/swab.h>
 
-- 
cgit v1.2.3


From dcce50e6cc4d86a63dc0a9a6ee7d4f948ccd53a1 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 8 Nov 2021 14:35:59 -0800
Subject: compiler.h: Fix annotation macro misplacement with Clang

When building with Clang and CONFIG_TRACE_BRANCH_PROFILING, there are a
lot of unreachable warnings, like:

  arch/x86/kernel/traps.o: warning: objtool: handle_xfd_event()+0x134: unreachable instruction

Without an input to the inline asm, 'volatile' is ignored for some
reason and Clang feels free to move the reachable() annotation away from
its intended location.

Fix that by re-adding the counter value to the inputs.

Fixes: f1069a8756b9 ("compiler.h: Avoid using inline asm operand modifiers")
Fixes: c199f64ff93c ("instrumentation.h: Avoid using inline asm operand modifiers")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/0417e96909b97a406323409210de7bf13df0b170.1636410380.git.jpoimboe@redhat.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: x86@kernel.org
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Miroslav Benes <mbenes@suse.cz>
---
 include/linux/compiler.h        | 4 ++--
 include/linux/instrumentation.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 3d5af56337bd..429dcebe2b99 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -121,7 +121,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	asm volatile(__stringify_label(c) ":\n\t"			\
 		     ".pushsection .discard.reachable\n\t"		\
 		     ".long " __stringify_label(c) "b - .\n\t"		\
-		     ".popsection\n\t");				\
+		     ".popsection\n\t" : : "i" (c));			\
 })
 #define annotate_reachable() __annotate_reachable(__COUNTER__)
 
@@ -129,7 +129,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	asm volatile(__stringify_label(c) ":\n\t"			\
 		     ".pushsection .discard.unreachable\n\t"		\
 		     ".long " __stringify_label(c) "b - .\n\t"		\
-		     ".popsection\n\t");				\
+		     ".popsection\n\t" : : "i" (c));			\
 })
 #define annotate_unreachable() __annotate_unreachable(__COUNTER__)
 
diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h
index fa2cd8c63dcc..24359b4a9605 100644
--- a/include/linux/instrumentation.h
+++ b/include/linux/instrumentation.h
@@ -11,7 +11,7 @@
 	asm volatile(__stringify(c) ": nop\n\t"				\
 		     ".pushsection .discard.instr_begin\n\t"		\
 		     ".long " __stringify(c) "b - .\n\t"		\
-		     ".popsection\n\t");				\
+		     ".popsection\n\t" : : "i" (c));			\
 })
 #define instrumentation_begin() __instrumentation_begin(__COUNTER__)
 
@@ -50,7 +50,7 @@
 	asm volatile(__stringify(c) ": nop\n\t"				\
 		     ".pushsection .discard.instr_end\n\t"		\
 		     ".long " __stringify(c) "b - .\n\t"		\
-		     ".popsection\n\t");				\
+		     ".popsection\n\t" : : "i" (c));			\
 })
 #define instrumentation_end() __instrumentation_end(__COUNTER__)
 #else
-- 
cgit v1.2.3


From 47402385d0b18ba46c6bfc4cff073c9359571fe4 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 9 Dec 2021 12:09:24 +0200
Subject: devlink: Add new "io_eq_size" generic device param

Add new device generic parameter to determine the size of the
I/O completion EQs.

For example, to reduce I/O EQ size to 64, execute:
$ devlink dev param set pci/0000:06:00.0 \
              name io_eq_size value 64 cmode driverinit
$ devlink dev reload pci/0000:06:00.0

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3276a29f2b81..b5f4acd0e0cd 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -459,6 +459,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+	DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -511,6 +512,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp"
 #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size"
+#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From 0b5705ebc355e79b71bf946058acec6820e27869 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 9 Dec 2021 12:09:26 +0200
Subject: devlink: Add new "event_eq_size" generic device param

Add new device generic parameter to determine the size of the
asynchronous control events EQ.

For example, to reduce event EQ size to 64, execute:
$ devlink dev param set pci/0000:06:00.0 \
              name event_eq_size value 64 cmode driverinit
$ devlink dev reload pci/0000:06:00.0

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index b5f4acd0e0cd..8d5349d2fb68 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -460,6 +460,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
 	DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+	DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -515,6 +516,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size"
 #define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32
 
+#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size"
+#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From f857acfc457ea63fa5b862d77f055665d863acfe Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 17 Nov 2021 14:53:48 -0700
Subject: lib/scatterlist: cleanup macros into static inline functions

Convert the sg_is_chain(), sg_is_last() and sg_chain_ptr() macros
into static inline functions. There's no reason for these to be macros
and static inline are generally preferred these days.

Also introduce the SG_PAGE_LINK_MASK define so the P2PDMA work, which is
adding another bit to this mask, can do so more easily.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/scatterlist.h | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 266754a55327..7ff9d6386c12 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -69,10 +69,27 @@ struct sg_append_table {
  * a valid sg entry, or whether it points to the start of a new scatterlist.
  * Those low bits are there for everyone! (thanks mason :-)
  */
-#define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
-#define sg_is_last(sg)		((sg)->page_link & SG_END)
-#define sg_chain_ptr(sg)	\
-	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END)
+
+static inline unsigned int __sg_flags(struct scatterlist *sg)
+{
+	return sg->page_link & SG_PAGE_LINK_MASK;
+}
+
+static inline struct scatterlist *sg_chain_ptr(struct scatterlist *sg)
+{
+	return (struct scatterlist *)(sg->page_link & ~SG_PAGE_LINK_MASK);
+}
+
+static inline bool sg_is_chain(struct scatterlist *sg)
+{
+	return __sg_flags(sg) & SG_CHAIN;
+}
+
+static inline bool sg_is_last(struct scatterlist *sg)
+{
+	return __sg_flags(sg) & SG_END;
+}
 
 /**
  * sg_assign_page - Assign a given page to an SG entry
@@ -92,7 +109,7 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 	 * In order for the low bit stealing approach to work, pages
 	 * must be aligned at a 32-bit boundary as a minimum.
 	 */
-	BUG_ON((unsigned long) page & (SG_CHAIN | SG_END));
+	BUG_ON((unsigned long)page & SG_PAGE_LINK_MASK);
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg_is_chain(sg));
 #endif
@@ -126,7 +143,7 @@ static inline struct page *sg_page(struct scatterlist *sg)
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg_is_chain(sg));
 #endif
-	return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END));
+	return (struct page *)((sg)->page_link & ~SG_PAGE_LINK_MASK);
 }
 
 /**
-- 
cgit v1.2.3


From d5624bb29f49b849ac8d1e9783dbf9c65cf33457 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Tue, 21 Dec 2021 11:55:56 +0800
Subject: asm-generic: introduce io_stop_wc() and add implementation for ARM64

For memory accesses with write-combining attributes (e.g. those returned
by ioremap_wc()), the CPU may wait for prior accesses to be merged with
subsequent ones. But in some situation, such wait is bad for the
performance.

We introduce io_stop_wc() to prevent the merging of write-combining
memory accesses before this macro with those after it.

We add implementation for ARM64 using DGH instruction and provide NOP
implementation for other architectures.

Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Suggested-by: Will Deacon <will@kernel.org>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20211221035556.60346-1-wangxiongfeng2@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/asm-generic/barrier.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 640f09479bdf..4c2c1b830344 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -251,5 +251,16 @@ do {									\
 #define pmem_wmb()	wmb()
 #endif
 
+/*
+ * ioremap_wc() maps I/O memory as memory with write-combining attributes. For
+ * this kind of memory accesses, the CPU may wait for prior accesses to be
+ * merged with subsequent ones. In some situation, such wait is bad for the
+ * performance. io_stop_wc() can be used to prevent the merging of
+ * write-combining memory accesses before this macro with those after it.
+ */
+#ifndef io_stop_wc
+#define io_stop_wc do { } while (0)
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
-- 
cgit v1.2.3


From 80b3485f7d7bdb2468f2a9d6a346a1132d248309 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Tue, 7 Dec 2021 17:50:10 -0800
Subject: PCI: Add #defines for accessing PCIe DVSEC fields

Add #defines for accessing Vendor ID, Revision, Length, and ID offsets
in the Designated Vendor Specific Extended Capability (DVSEC). Defined
in PCIe r5.0, sec 7.9.6.

Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://lore.kernel.org/r/20211208015015.891275-2-david.e.box@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/pci_regs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ff6ccbc6efe9..318f3f1f9e92 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1086,7 +1086,11 @@
 
 /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */
 #define PCI_DVSEC_HEADER1		0x4 /* Designated Vendor-Specific Header1 */
+#define  PCI_DVSEC_HEADER1_VID(x)	((x) & 0xffff)
+#define  PCI_DVSEC_HEADER1_REV(x)	(((x) >> 16) & 0xf)
+#define  PCI_DVSEC_HEADER1_LEN(x)	(((x) >> 20) & 0xfff)
 #define PCI_DVSEC_HEADER2		0x8 /* Designated Vendor-Specific Header2 */
+#define  PCI_DVSEC_HEADER2_ID(x)		((x) & 0xffff)
 
 /* Data Link Feature */
 #define PCI_DLF_CAP		0x04	/* Capabilities Register */
-- 
cgit v1.2.3


From 365481e42a8a95c55e43e8cc236138718e762e7b Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Tue, 7 Dec 2021 17:50:11 -0800
Subject: driver core: auxiliary bus: Add driver data helpers

Adds get/set driver data helpers for auxiliary devices.

Reviewed-by: Mark Gross <markgross@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://lore.kernel.org/r/20211208015015.891275-3-david.e.box@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index e6d8b5c16226..de21d9d24a95 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -188,6 +188,16 @@ struct auxiliary_driver {
 	const struct auxiliary_device_id *id_table;
 };
 
+static inline void *auxiliary_get_drvdata(struct auxiliary_device *auxdev)
+{
+	return dev_get_drvdata(&auxdev->dev);
+}
+
+static inline void auxiliary_set_drvdata(struct auxiliary_device *auxdev, void *data)
+{
+	dev_set_drvdata(&auxdev->dev, data);
+}
+
 static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev)
 {
 	return container_of(dev, struct auxiliary_device, dev);
-- 
cgit v1.2.3


From b398123bff3bcbc1facb0f29bf6e7b9f1bc55931 Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Wed, 15 Dec 2021 10:13:48 +0800
Subject: efi: apply memblock cap after memblock_add()

On arm64, during kdump kernel saves vmcore, it runs into the following bug:
...
[   15.148919] usercopy: Kernel memory exposure attempt detected from SLUB object 'kmem_cache_node' (offset 0, size 4096)!
[   15.159707] ------------[ cut here ]------------
[   15.164311] kernel BUG at mm/usercopy.c:99!
[   15.168482] Internal error: Oops - BUG: 0 [#1] SMP
[   15.173261] Modules linked in: xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce sbsa_gwdt ast i2c_algo_bit drm_vram_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec drm_ttm_helper ttm drm nvme nvme_core xgene_hwmon i2c_designware_platform i2c_designware_core dm_mirror dm_region_hash dm_log dm_mod overlay squashfs zstd_decompress loop
[   15.206186] CPU: 0 PID: 542 Comm: cp Not tainted 5.16.0-rc4 #1
[   15.212006] Hardware name: GIGABYTE R272-P30-JG/MP32-AR0-JG, BIOS F12 (SCP: 1.5.20210426) 05/13/2021
[   15.221125] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   15.228073] pc : usercopy_abort+0x9c/0xa0
[   15.232074] lr : usercopy_abort+0x9c/0xa0
[   15.236070] sp : ffff8000121abba0
[   15.239371] x29: ffff8000121abbb0 x28: 0000000000003000 x27: 0000000000000000
[   15.246494] x26: 0000000080000400 x25: 0000ffff885c7000 x24: 0000000000000000
[   15.253617] x23: 000007ff80400000 x22: ffff07ff80401000 x21: 0000000000000001
[   15.260739] x20: 0000000000001000 x19: ffff07ff80400000 x18: ffffffffffffffff
[   15.267861] x17: 656a626f2042554c x16: 53206d6f72662064 x15: 6574636574656420
[   15.274983] x14: 74706d6574746120 x13: 2129363930342065 x12: 7a6973202c302074
[   15.282105] x11: ffffc8b041d1b148 x10: 00000000ffff8000 x9 : ffffc8b04012812c
[   15.289228] x8 : 00000000ffff7fff x7 : ffffc8b041d1b148 x6 : 0000000000000000
[   15.296349] x5 : 0000000000000000 x4 : 0000000000007fff x3 : 0000000000000000
[   15.303471] x2 : 0000000000000000 x1 : ffff07ff8c064800 x0 : 000000000000006b
[   15.310593] Call trace:
[   15.313027]  usercopy_abort+0x9c/0xa0
[   15.316677]  __check_heap_object+0xd4/0xf0
[   15.320762]  __check_object_size.part.0+0x160/0x1e0
[   15.325628]  __check_object_size+0x2c/0x40
[   15.329711]  copy_oldmem_page+0x7c/0x140
[   15.333623]  read_from_oldmem.part.0+0xfc/0x1c0
[   15.338142]  __read_vmcore.constprop.0+0x23c/0x350
[   15.342920]  read_vmcore+0x28/0x34
[   15.346309]  proc_reg_read+0xb4/0xf0
[   15.349871]  vfs_read+0xb8/0x1f0
[   15.353088]  ksys_read+0x74/0x100
[   15.356390]  __arm64_sys_read+0x28/0x34
...

This bug introduced by commit b261dba2fdb2 ("arm64: kdump: Remove custom
linux,usable-memory-range handling"), which moves
memblock_cap_memory_range() to fdt, but it breaches the rules that
memblock_cap_memory_range() should come after memblock_add() etc as said
in commit e888fa7bb882 ("memblock: Check memory add/cap ordering").

As a consequence, the virtual address set up by copy_oldmem_page() does
not bail out from the test of virt_addr_valid() in check_heap_object(),
and finally hits the BUG_ON().

Since memblock allocator has no idea about when the memblock is fully
populated, while efi_init() is aware, so tackling this issue by calling the
interface early_init_dt_check_for_usable_mem_range() exposed by of/fdt.

Fixes: b261dba2fdb2 ("arm64: kdump: Remove custom linux,usable-memory-range handling")
Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Nick Terrell <terrelln@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
To: devicetree@vger.kernel.org
To: linux-efi@vger.kernel.org
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211215021348.8766-1-kernelfans@gmail.com
---
 include/linux/of_fdt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index cf48983d3c86..ad09beb6d13c 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -62,6 +62,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
+extern void early_init_dt_check_for_usable_mem_range(void);
 extern int early_init_dt_scan_chosen_stdout(void);
 extern void early_init_fdt_scan_reserved_mem(void);
 extern void early_init_fdt_reserve_self(void);
@@ -86,6 +87,7 @@ extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 extern void early_get_first_memblock_info(void *, phys_addr_t *);
 #else /* CONFIG_OF_EARLY_FLATTREE */
+static inline void early_init_dt_check_for_usable_mem_range(void) {}
 static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; }
 static inline void early_init_fdt_scan_reserved_mem(void) {}
 static inline void early_init_fdt_reserve_self(void) {}
-- 
cgit v1.2.3


From f2f8115fe8b390af27d013411045bd712a812103 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@kernel.org>
Date: Tue, 21 Dec 2021 15:17:56 +0200
Subject: memory: omap-gpmc: Use a compatible match table when checking for
 NAND controller

As more compatibles can be added to the GPMC NAND controller driver
use a compatible match table.

Signed-off-by: Roger Quadros <rogerq@kernel.org>
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/r/20211221131757.2030-4-rogerq@kernel.org
[krzysztof: remove "is_nand" variable]
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 include/linux/platform_data/mtd-nand-omap2.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index de6ada739121..92f011805ad4 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -7,6 +7,7 @@
 #define	_MTD_NAND_OMAP2_H
 
 #include <linux/mtd/partitions.h>
+#include <linux/mod_devicetable.h>
 
 #define	GPMC_BCH_NUM_REMAINDER	8
 
@@ -61,4 +62,10 @@ struct gpmc_nand_regs {
 	void __iomem	*gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER];
 };
-#endif
+
+static const struct of_device_id omap_nand_ids[] = {
+	{ .compatible = "ti,omap2-nand", },
+	{},
+};
+
+#endif /* _MTD_NAND_OMAP2_H */
-- 
cgit v1.2.3


From d7f55471db2719629f773c2d6b5742a69595bfd3 Mon Sep 17 00:00:00 2001
From: Jackie Liu <liuyun01@kylinos.cn>
Date: Fri, 17 Dec 2021 10:07:54 +0800
Subject: memblock: fix memblock_phys_alloc() section mismatch error

Fix modpost Section mismatch error in memblock_phys_alloc()

[...]
WARNING: modpost: vmlinux.o(.text.unlikely+0x1dcc): Section mismatch in reference
from the function memblock_phys_alloc() to the function .init.text:memblock_phys_alloc_range()
The function memblock_phys_alloc() references
the function __init memblock_phys_alloc_range().
This is often because memblock_phys_alloc lacks a __init
annotation or the annotation of memblock_phys_alloc_range is wrong.

ERROR: modpost: Section mismatches detected.
Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.
[...]

memblock_phys_alloc() is a one-line wrapper, make it __always_inline to
avoid these section mismatches.

Reported-by: k2ci <kernel-bot@kylinos.cn>
Suggested-by: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
[rppt: slightly massaged changelog ]
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20211217020754.2874872-1-liu.yun@linux.dev
---
 include/linux/memblock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8adcf1fa8096..9dc7cb239d21 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -405,8 +405,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size,
 				      phys_addr_t end, int nid, bool exact_nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
-static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
-					      phys_addr_t align)
+static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
+						       phys_addr_t align)
 {
 	return memblock_phys_alloc_range(size, align, 0,
 					 MEMBLOCK_ALLOC_ACCESSIBLE);
-- 
cgit v1.2.3


From 744451c162a514044a912cbbd64b7a386035cc5b Mon Sep 17 00:00:00 2001
From: Benjamin Berg <bberg@redhat.com>
Date: Fri, 17 Dec 2021 16:28:09 +0100
Subject: Bluetooth: hci_sync: Push sync command cancellation to workqueue

syzbot reported that hci_cmd_sync_cancel may sleep from the wrong
context. To avoid this, create a new work item that pushes the relevant
parts into a different context.

Note that we keep the old implementation with the name
__hci_cmd_sync_cancel as the sleeping behaviour is desired in some
cases.

Reported-and-tested-by: syzbot+485cc00ea7cf41dfdbf1@syzkaller.appspotmail.com
Fixes: c97a747efc93 ("Bluetooth: btusb: Cancel sync commands for certain URB errors")
Signed-off-by: Benjamin Berg <bberg@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 include/net/bluetooth/hci_sync.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4d69dcfebd63..6509109c2413 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -480,6 +480,7 @@ struct hci_dev {
 	struct work_struct	cmd_sync_work;
 	struct list_head	cmd_sync_work_list;
 	struct mutex		cmd_sync_work_lock;
+	struct work_struct	cmd_sync_cancel_work;
 
 	__u16			discov_timeout;
 	struct delayed_work	discov_off;
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index f4034bf8f1ce..435674cf388e 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -38,6 +38,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
 void hci_cmd_sync_init(struct hci_dev *hdev);
 void hci_cmd_sync_clear(struct hci_dev *hdev);
 void hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
+void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
 
 int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 		       void *data, hci_cmd_sync_work_destroy_t destroy);
-- 
cgit v1.2.3


From 8e8b92ee60de5341e9db83c11f75a525e555e2b3 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 22 Dec 2021 12:21:56 -0800
Subject: Bluetooth: hci_sync: Add hci_le_create_conn_sync

This adds hci_le_create_conn_sync and make hci_le_connect use it instead
of queueing multiple commands which may conflict with the likes of
hci_update_passive_scan which uses hci_cmd_sync_queue.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 +--
 include/net/bluetooth/hci_sync.h | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6509109c2413..1ab94960764e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1121,8 +1121,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
 				     enum conn_reasons conn_reason);
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 				u8 dst_type, bool dst_resolved, u8 sec_level,
-				u16 conn_timeout, u8 role,
-				bdaddr_t *direct_rpa);
+				u16 conn_timeout, u8 role);
 struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
 				 u8 sec_level, u8 auth_type,
 				 enum conn_reasons conn_reason);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 435674cf388e..2492e3b46a8f 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -102,3 +102,7 @@ int hci_stop_discovery_sync(struct hci_dev *hdev);
 
 int hci_suspend_sync(struct hci_dev *hdev);
 int hci_resume_sync(struct hci_dev *hdev);
+
+struct hci_conn;
+
+int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn);
-- 
cgit v1.2.3


From 85b56857e194635b772be8af1c7650535d5d112a Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 22 Dec 2021 12:21:57 -0800
Subject: Bluetooth: hci_sync: Add support for waiting specific LE subevents

This adds support for waiting for specific LE subevents instead of
command status which may only indicate that the commands is in progress
and a different event is used to complete the operation.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 77906832353f..4b3d0b16c185 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -412,6 +412,7 @@ struct bt_skb_cb {
 #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type
 #define hci_skb_expect(skb) bt_cb((skb))->expect
 #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode
+#define hci_skb_event(skb) bt_cb((skb))->hci.req_event
 #define hci_skb_sk(skb) bt_cb((skb))->hci.sk
 
 static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
-- 
cgit v1.2.3


From 4fc9857ab8c6cfe2152df3288c8cf3300b929f1a Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 22 Dec 2021 12:21:59 -0800
Subject: Bluetooth: hci_sync: Add check simultaneous roles support

This attempts to check if the controller can act as both central and
peripheral simultaneously and in case it does skip suspending
advertising or in case of directed advertising don't fail if scanning.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1ab94960764e..586f69d084a2 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -783,6 +783,12 @@ extern struct mutex hci_cb_list_lock;
 		hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT);	\
 	} while (0)
 
+#define hci_dev_le_state_simultaneous(hdev) \
+	(test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \
+	 (hdev->le_states[4] & 0x08) &&	/* Central */ \
+	 (hdev->le_states[4] & 0x40) &&	/* Peripheral */ \
+	 (hdev->le_states[3] & 0x10))	/* Simultaneous */
+
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
 int l2cap_disconn_ind(struct hci_conn *hcon);
-- 
cgit v1.2.3


From 76d0685bbac8ae017e5d12eba25fb3c4f0ec77ac Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 22 Dec 2021 12:22:00 -0800
Subject: Bluetooth: MGMT: Fix LE simultaneous roles UUID if not supported

If controller/driver don't support LE simultaneous roles its UUID shall
be omitted when responding to MGMT_OP_READ_EXP_FEATURES_INFO.

This also rework the support introducing HCI_LE_SIMULTANEOUS_ROLES flag
so it can be detected when userspace wants to use or not.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7f5f00ff53da..e2b06bb79e2e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -341,6 +341,7 @@ enum {
 	HCI_FORCE_NO_MITM,
 	HCI_QUALITY_REPORT,
 	HCI_OFFLOAD_CODECS_ENABLED,
+	HCI_LE_SIMULTANEOUS_ROLES,
 
 	__HCI_NUM_FLAGS,
 };
-- 
cgit v1.2.3


From 15fcb1031178f2a42425c2993b2ec7bb894c04d6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 21 Dec 2021 11:39:40 -0800
Subject: codel: remove unnecessary sock.h include

Since sock.h is modified relatively often (60 times in the last
12 months) it seems worthwhile to decrease the incremental build
work.

CoDel's header includes net/inet_ecn.h which in turn includes net/sock.h.
codel.h is itself included by mac80211 which is included by much of
the WiFi stack and drivers. Removing the net/inet_ecn.h include from
CoDel breaks the dependecy between WiFi and sock.h.

Commit d068ca2ae2e6 ("codel: split into multiple files") moved all
the code which actually needs ECN helpers out to net/codel_impl.h,
the include can be moved there as well.

This decreases the incremental build size after touching sock.h
from 4999 objects to 4051 objects.

Fix unmasked missing includes in WiFi drivers.

Acked-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20211221193941.3805147-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/codel.h      | 1 -
 include/net/codel_impl.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/codel.h b/include/net/codel.h
index a6c9e34e62b8..d74dd8fda54e 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -45,7 +45,6 @@
 #include <linux/ktime.h>
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
-#include <net/inet_ecn.h>
 
 /* Controlling Queue Delay (CoDel) algorithm
  * =========================================
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index 137d40d8cbeb..78a27ac73070 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -49,6 +49,8 @@
  * Implemented on linux by Dave Taht and Eric Dumazet
  */
 
+#include <net/inet_ecn.h>
+
 static void codel_params_init(struct codel_params *params)
 {
 	params->interval = MS2TIME(100);
-- 
cgit v1.2.3


From e6e5904455815626b711c7d48cacd253f4d72f84 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 21 Dec 2021 11:39:41 -0800
Subject: codel: remove unnecessary pkt_sched.h include

Commit d068ca2ae2e6 ("codel: split into multiple files") moved all
Qdisc-related code to codel_qdisc.h, move the include of pkt_sched.h
as well.

This is similar to the previous commit, although we don't care as
much about incremental builds after pkt_sched.h was touched itself
it is included by net/sch_generic.h which is modified ~20 times
a year.

This decreases the incremental build size after touching pkt_sched.h
from 1592 to 617 objects.

Fix unmasked missing includes in WiFi drivers.

Acked-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20211221193941.3805147-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/codel.h       | 1 -
 include/net/codel_qdisc.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/codel.h b/include/net/codel.h
index d74dd8fda54e..5fed2f16cb8d 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -44,7 +44,6 @@
 #include <linux/types.h>
 #include <linux/ktime.h>
 #include <linux/skbuff.h>
-#include <net/pkt_sched.h>
 
 /* Controlling Queue Delay (CoDel) algorithm
  * =========================================
diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h
index 098630f83a55..58b6d0ebea10 100644
--- a/include/net/codel_qdisc.h
+++ b/include/net/codel_qdisc.h
@@ -49,6 +49,8 @@
  * Implemented on linux by Dave Taht and Eric Dumazet
  */
 
+#include <net/pkt_sched.h>
+
 /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
 struct codel_skb_cb {
 	codel_time_t enqueue_time;
-- 
cgit v1.2.3


From 6fd3c510ee4b37f2f9fe3d3cafbfa459e15c5e11 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 22 Dec 2021 13:15:32 -0800
Subject: bio.h: fix kernel-doc warnings

Fix all kernel-doc warnings in <linux/bio.h>:

include/linux/bio.h:136: warning: Function parameter or member 'nbytes' not described in 'bio_advance'
include/linux/bio.h:136: warning: Excess function parameter 'bytes' description in 'bio_advance'
include/linux/bio.h:391: warning: No description found for return value of 'bio_next_split'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Link: https://lore.kernel.org/r/20211222211532.24060-1-rdunlap@infradead.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index fe6bdfbbef66..0a41efe02208 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -124,7 +124,7 @@ void __bio_advance(struct bio *, unsigned bytes);
 /**
  * bio_advance - increment/complete a bio by some number of bytes
  * @bio:	bio to advance
- * @bytes:	number of bytes to complete
+ * @nbytes:	number of bytes to complete
  *
  * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
  * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
@@ -332,7 +332,7 @@ extern struct bio *bio_split(struct bio *bio, int sectors,
  * @gfp:	gfp mask
  * @bs:		bio set to allocate from
  *
- * Returns a bio representing the next @sectors of @bio - if the bio is smaller
+ * Return: a bio representing the next @sectors of @bio - if the bio is smaller
  * than @sectors, returns the original bio unchanged.
  */
 static inline struct bio *bio_next_split(struct bio *bio, int sectors,
-- 
cgit v1.2.3


From a16c7246368db8935652c805bc446928d0e1c0aa Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 22 Dec 2021 13:52:39 -0800
Subject: block: remove unnecessary trailing '\'

While harmless, the blank line is certainly not intended to be part of
the rq_list_for_each() macro. Remove it.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20211222215239.1768164-1-kbusch@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bb5fb7282e6e..22746b2d6825 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1363,7 +1363,7 @@ struct io_comp_batch {
 })
 
 #define rq_list_for_each(listptr, pos)			\
-	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \
+	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
 
 #define rq_list_next(rq)	(rq)->rq_next
 #define rq_list_empty(list)	((list) == (struct request *) NULL)
-- 
cgit v1.2.3


From 023223dfbfb34fcc9b7dd41e21fbf9a5d5237989 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 17 Dec 2021 20:37:34 +0100
Subject: netfilter: nf_tables: make counter support built-in

Make counter support built-in to allow for direct call in case of
CONFIG_RETPOLINE.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 0fa5a6d98a00..b6fb1fdff9b2 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -7,6 +7,7 @@
 
 extern struct nft_expr_type nft_imm_type;
 extern struct nft_expr_type nft_cmp_type;
+extern struct nft_expr_type nft_counter_type;
 extern struct nft_expr_type nft_lookup_type;
 extern struct nft_expr_type nft_bitwise_type;
 extern struct nft_expr_type nft_byteorder_type;
@@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type;
 #ifdef CONFIG_NETWORK_SECMARK
 extern struct nft_object_type nft_secmark_obj_type;
 #endif
+extern struct nft_object_type nft_counter_obj_type;
 
 int nf_tables_core_module_init(void);
 void nf_tables_core_module_exit(void);
@@ -120,6 +122,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
 bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
 			    const u32 *key, const struct nft_set_ext **ext);
 
+void nft_counter_init_seqcount(void);
+
 struct nft_expr;
 struct nft_regs;
 struct nft_pktinfo;
@@ -143,4 +147,6 @@ void nft_dynset_eval(const struct nft_expr *expr,
 		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 void nft_rt_get_eval(const struct nft_expr *expr,
 		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
+                      const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
-- 
cgit v1.2.3


From 4a6fbdd801e882ee6ca5cdfdc3374f0ae263174c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Dec 2021 11:29:56 +0100
Subject: netfilter: conntrack: tag conntracks picked up in local out hook

This allows to identify flows that originate from local machine
in a followup patch.

It would be possible to make this a ->status bit instead.
For now I did not do that yet because I don't have a use-case for
exposing this info to userspace.

If one comes up the toggle can be replaced with a status bit.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d24b0a34c8f0..871489df63c6 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -95,6 +95,7 @@ struct nf_conn {
 	unsigned long status;
 
 	u16		cpu;
+	u16		local_origin:1;
 	possible_net_t ct_net;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
-- 
cgit v1.2.3


From fbefe22811c3140a686e407e114789ebf328a9a2 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 20 Dec 2021 19:21:24 +0800
Subject: scsi: libsas: Don't always drain event workqueue for HA resume

For the hisi_sas driver, if a directly attached disk is removed during
suspend, a hang will occur in the resume process:

The background is that in commit 16fd4a7c5917 ("scsi: hisi_sas: Add device
link between SCSI devices and hisi_hba"), it is ensured that the HBA device
cannot be runtime suspended when any SCSI device associated is active.

Other drivers which use libsas don't worry about this as none support
runtime suspend.

The mentioned hang occurs when an disk is removed during suspend. In the
removal process - from PHYE_RESUME_TIMEOUT event processing - we call into
scsi_remove_device(), which is being processed in the HA event workqueue.
Here we wait for all suppliers of the SCSI device to resume, which includes
the HBA device (from the above commit). However the HBA device cannot
resume, as it is waiting for the PHYE_RESUME_TIMEOUT to be processed (from
calling sas_resume_ha() -> sas_drain_work()). This is the deadlock.

There does not appear to be any need for the sas_drain_work() to be called
at all in sas_resume_ha() as it is not syncing against anything, so allow
LLDDs to avoid this by providing a variant of sas_resume_ha() which does
"sync", i.e. doesn't drain the event workqueue.

Link: https://lore.kernel.org/r/1639999298-244569-2-git-send-email-chenxiang66@hisilicon.com
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libsas.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 79e4903bd414..a795a2d9e5b1 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -660,6 +660,7 @@ extern int sas_register_ha(struct sas_ha_struct *);
 extern int sas_unregister_ha(struct sas_ha_struct *);
 extern void sas_prep_resume_ha(struct sas_ha_struct *sas_ha);
 extern void sas_resume_ha(struct sas_ha_struct *sas_ha);
+extern void sas_resume_ha_no_sync(struct sas_ha_struct *sas_ha);
 extern void sas_suspend_ha(struct sas_ha_struct *sas_ha);
 
 int sas_set_phy_speed(struct sas_phy *phy, struct sas_phy_linkrates *rates);
-- 
cgit v1.2.3


From 6e1fcab00a23f7fe9f4fe9704905a790efa1eeab Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 20 Dec 2021 19:21:26 +0800
Subject: scsi: block: pm: Always set request queue runtime active in
 blk_post_runtime_resume()

John Garry reported a deadlock that occurs when trying to access a
runtime-suspended SATA device.  For obscure reasons, the rescan procedure
causes the link to be hard-reset, which disconnects the device.

The rescan tries to carry out a runtime resume when accessing the device.
scsi_rescan_device() holds the SCSI device lock and won't release it until
it can put commands onto the device's block queue.  This can't happen until
the queue is successfully runtime-resumed or the device is unregistered.
But the runtime resume fails because the device is disconnected, and
__scsi_remove_device() can't do the unregistration because it can't get the
device lock.

The best way to resolve this deadlock appears to be to allow the block
queue to start running again even after an unsuccessful runtime resume.
The idea is that the driver or the SCSI error handler will need to be able
to use the queue to resolve the runtime resume failure.

This patch removes the err argument to blk_post_runtime_resume() and makes
the routine act as though the resume was successful always.  This fixes the
deadlock.

Link: https://lore.kernel.org/r/1639999298-244569-4-git-send-email-chenxiang66@hisilicon.com
Fixes: e27829dc92e5 ("scsi: serialize ->rescan against ->remove")
Reported-and-tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/blk-pm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h
index b80c65aba249..2580e05a8ab6 100644
--- a/include/linux/blk-pm.h
+++ b/include/linux/blk-pm.h
@@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
 extern int blk_pre_runtime_suspend(struct request_queue *q);
 extern void blk_post_runtime_suspend(struct request_queue *q, int err);
 extern void blk_pre_runtime_resume(struct request_queue *q);
-extern void blk_post_runtime_resume(struct request_queue *q, int err);
+extern void blk_post_runtime_resume(struct request_queue *q);
 extern void blk_set_runtime_active(struct request_queue *q);
 #else
 static inline void blk_pm_runtime_init(struct request_queue *q,
-- 
cgit v1.2.3


From 4ea775abbb5c50c26edbf043d5a2ae7fde407f4a Mon Sep 17 00:00:00 2001
From: Xiang Chen <chenxiang66@hisilicon.com>
Date: Mon, 20 Dec 2021 19:21:33 +0800
Subject: scsi: libsas: Add flag SAS_HA_RESUMING

Add a flag SAS_HA_RESUMING and use it to indicate the state of resuming the
host controller.

Link: https://lore.kernel.org/r/1639999298-244569-11-git-send-email-chenxiang66@hisilicon.com
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libsas.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index a795a2d9e5b1..698f2032807b 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -356,6 +356,7 @@ enum sas_ha_state {
 	SAS_HA_DRAINING,
 	SAS_HA_ATA_EH_ACTIVE,
 	SAS_HA_FROZEN,
+	SAS_HA_RESUMING,
 };
 
 struct sas_ha_struct {
-- 
cgit v1.2.3


From e6911affa416dc4e0c0b3f04cbe6b02ce13277f1 Mon Sep 17 00:00:00 2001
From: Xu Jia <xujia39@huawei.com>
Date: Wed, 22 Dec 2021 17:06:58 +0800
Subject: xfrm: Add support for SM3 secure hash

This patch allows IPsec to use SM3 HMAC authentication algorithm.

Signed-off-by: Xu Jia <xujia39@huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/pfkeyv2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h
index d65b11785260..798ba9ffd48c 100644
--- a/include/uapi/linux/pfkeyv2.h
+++ b/include/uapi/linux/pfkeyv2.h
@@ -309,6 +309,7 @@ struct sadb_x_filter {
 #define SADB_X_AALG_SHA2_512HMAC	7
 #define SADB_X_AALG_RIPEMD160HMAC	8
 #define SADB_X_AALG_AES_XCBC_MAC	9
+#define SADB_X_AALG_SM3_256HMAC		10
 #define SADB_X_AALG_NULL		251	/* kame */
 #define SADB_AALG_MAX			251
 
-- 
cgit v1.2.3


From 23b6a6df94c6ce434e7947cfad14b1640fb9f794 Mon Sep 17 00:00:00 2001
From: Xu Jia <xujia39@huawei.com>
Date: Wed, 22 Dec 2021 17:06:59 +0800
Subject: xfrm: Add support for SM4 symmetric cipher algorithm

This patch adds SM4 encryption algorithm entry to ealg_list.

Signed-off-by: Xu Jia <xujia39@huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/pfkeyv2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h
index 798ba9ffd48c..8abae1f6749c 100644
--- a/include/uapi/linux/pfkeyv2.h
+++ b/include/uapi/linux/pfkeyv2.h
@@ -330,6 +330,7 @@ struct sadb_x_filter {
 #define SADB_X_EALG_AES_GCM_ICV16	20
 #define SADB_X_EALG_CAMELLIACBC		22
 #define SADB_X_EALG_NULL_AES_GMAC	23
+#define SADB_X_EALG_SM4CBC		24
 #define SADB_EALG_MAX                   253 /* last EALG */
 /* private allocations should use 249-255 (RFC2407) */
 #define SADB_X_EALG_SERPENTCBC  252     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
-- 
cgit v1.2.3


From 4e484b3e969b52effd95c17f7a86f39208b2ccf4 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Wed, 22 Dec 2021 14:11:18 +0100
Subject: xfrm: rate limit SA mapping change message to user space

Kernel generates mapping change message, XFRM_MSG_MAPPING,
when a source port chage is detected on a input state with UDP
encapsulation set.  Kernel generates a message for each IPsec packet
with new source port.  For a high speed flow per packet mapping change
message can be excessive, and can overload the user space listener.

Introduce rate limiting for XFRM_MSG_MAPPING message to the user space.

The rate limiting is configurable via netlink, when adding a new SA or
updating it. Use the new attribute XFRMA_MTIMER_THRESH in seconds.

v1->v2 change:
	update xfrm_sa_len()

v2->v3 changes:
	use u32 insted unsigned long to reduce size of struct xfrm_state
	fix xfrm_ompat size Reported-by: kernel test robot <lkp@intel.com>
	accept XFRM_MSG_MAPPING only when XFRMA_ENCAP is present

Co-developed-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        | 5 +++++
 include/uapi/linux/xfrm.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2308210793a0..2589e4c0501b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -200,6 +200,11 @@ struct xfrm_state {
 	struct xfrm_algo_aead	*aead;
 	const char		*geniv;
 
+	/* mapping change rate limiting */
+	__be16 new_mapping_sport;
+	u32 new_mapping;	/* seconds */
+	u32 mapping_maxage;	/* seconds for input SA */
+
 	/* Data for encapsulator */
 	struct xfrm_encap_tmpl	*encap;
 	struct sock __rcu	*encap_sk;
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index eda0426ec4c2..4e29d7851890 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -313,6 +313,7 @@ enum xfrm_attr_type_t {
 	XFRMA_SET_MARK,		/* __u32 */
 	XFRMA_SET_MARK_MASK,	/* __u32 */
 	XFRMA_IF_ID,		/* __u32 */
+	XFRMA_MTIMER_THRESH,	/* __u32 in seconds for input SA */
 	__XFRMA_MAX
 
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
-- 
cgit v1.2.3


From 3d3b2f57d4447e6e9f4096ad01d0e4129f7bc7e9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 21 Dec 2021 16:40:30 -0500
Subject: sctp: move hlist_node and hashent out of sctp_ep_common

Struct sctp_ep_common is included in both asoc and ep, but hlist_node
and hashent are only needed by ep after asoc_hashtable was dropped by
Commit b5eff7128366 ("sctp: drop the old assoc hashtable of sctp").

So it is better to move hlist_node and hashent from sctp_ep_common to
sctp_endpoint, and it saves some space for each asoc.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    | 4 ++--
 include/net/sctp/structs.h | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 189fdb9db162..33cf4789009f 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -510,8 +510,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
 	return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1);
 }
 
-#define sctp_for_each_hentry(epb, head) \
-	hlist_for_each_entry(epb, head, node)
+#define sctp_for_each_hentry(ep, head) \
+	hlist_for_each_entry(ep, head, node)
 
 /* Is a socket of this style? */
 #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e7d1ed7a3dfb..8826b47e4ff7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1243,10 +1243,6 @@ enum sctp_endpoint_type {
  */
 
 struct sctp_ep_common {
-	/* Fields to help us manage our entries in the hash tables. */
-	struct hlist_node node;
-	int hashent;
-
 	/* Runtime type information.  What kind of endpoint is this? */
 	enum sctp_endpoint_type type;
 
@@ -1298,6 +1294,10 @@ struct sctp_endpoint {
 	/* Common substructure for endpoint and association. */
 	struct sctp_ep_common base;
 
+	/* Fields to help us manage our entries in the hash tables. */
+	struct hlist_node node;
+	int hashent;
+
 	/* Associations: A list of current associations and mappings
 	 *	      to the data consumers for each association. This
 	 *	      may be in the form of a hash table or other
-- 
cgit v1.2.3


From 30be4551f9e26292599e666985119a5b559a2e4a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 22 Dec 2021 18:32:56 +0200
Subject: wwan: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/wwan.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index e143c88bf4b0..afb3334ec8c5 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -4,12 +4,9 @@
 #ifndef __WWAN_H
 #define __WWAN_H
 
-#include <linux/device.h>
-#include <linux/kernel.h>
 #include <linux/poll.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/netdevice.h>
+#include <linux/types.h>
 
 /**
  * enum wwan_port_type - WWAN port types
@@ -37,6 +34,10 @@ enum wwan_port_type {
 	WWAN_PORT_UNKNOWN,
 };
 
+struct device;
+struct file;
+struct netlink_ext_ack;
+struct sk_buff;
 struct wwan_port;
 
 /** struct wwan_port_ops - The WWAN port operations
-- 
cgit v1.2.3


From 7da1d1ddd1f02e5de7497a0c849256912652fb6c Mon Sep 17 00:00:00 2001
From: Nick Child <nick.child@ibm.com>
Date: Thu, 16 Dec 2021 17:00:35 -0500
Subject: cuda/pmu: Make find_via_cuda/pmu init functions

Make `find_via_cuda` and `find_via_pmu` initialization functions.
Previously, their definitions in `drivers/macintosh/via-cuda.h` include
the `__init` attribute but their alternative definitions in
`arch/powerpc/powermac/sectup./c` and prototypes in `include/linux/
cuda.h` and `include/linux/pmu.h` do not use the `__init` macro. Since,
only initialization functions call `find_via_cuda` and `find_via_pmu`
it is safe to label these functions with `__init`.

Signed-off-by: Nick Child <nick.child@ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211216220035.605465-21-nick.child@ibm.com
---
 include/linux/cuda.h | 2 +-
 include/linux/pmu.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cuda.h b/include/linux/cuda.h
index 45bfe9d61271..daf3e6f98444 100644
--- a/include/linux/cuda.h
+++ b/include/linux/cuda.h
@@ -12,7 +12,7 @@
 #include <uapi/linux/cuda.h>
 
 
-extern int find_via_cuda(void);
+extern int __init find_via_cuda(void);
 extern int cuda_request(struct adb_request *req,
 			void (*done)(struct adb_request *), int nbytes, ...);
 extern void cuda_poll(void);
diff --git a/include/linux/pmu.h b/include/linux/pmu.h
index 52453a24a24f..c677442d007c 100644
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -13,7 +13,7 @@
 #include <uapi/linux/pmu.h>
 
 
-extern int find_via_pmu(void);
+extern int __init find_via_pmu(void);
 
 extern int pmu_request(struct adb_request *req,
 		void (*done)(struct adb_request *), int nbytes, ...);
-- 
cgit v1.2.3


From fc179420fde3821c4d191e81b4f7b05c1dab87e2 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 23 Dec 2021 13:36:17 +0200
Subject: ASoC: SOF: Move the definition of enum snd_sof_fw_state to global
 header

Move the enum snd_sof_fw_state to include/sound/sof.h to be accessible
outside of the core SOF stack.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Paul Olaru <paul.olaru@oss.nxp.com>
Link: https://lore.kernel.org/r/20211223113628.18582-10-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/sound/sof.h b/include/sound/sof.h
index 23b374311d16..b9131c01eefd 100644
--- a/include/sound/sof.h
+++ b/include/sound/sof.h
@@ -17,6 +17,28 @@
 
 struct snd_sof_dsp_ops;
 
+/**
+ * enum snd_sof_fw_state - DSP firmware state definitions
+ * @SOF_FW_BOOT_NOT_STARTED:	firmware boot is not yet started
+ * @SOF_FW_BOOT_PREPARE:	preparing for boot (firmware loading for exaqmple)
+ * @SOF_FW_BOOT_IN_PROGRESS:	firmware boot is in progress
+ * @SOF_FW_BOOT_FAILED:		firmware boot failed
+ * @SOF_FW_BOOT_READY_FAILED:	firmware booted but fw_ready op failed
+ * @SOF_FW_BOOT_READY_OK:	firmware booted and fw_ready op passed
+ * @SOF_FW_BOOT_COMPLETE:	firmware is booted up and functional
+ * @SOF_FW_CRASHED:		firmware crashed after successful boot
+ */
+enum snd_sof_fw_state {
+	SOF_FW_BOOT_NOT_STARTED = 0,
+	SOF_FW_BOOT_PREPARE,
+	SOF_FW_BOOT_IN_PROGRESS,
+	SOF_FW_BOOT_FAILED,
+	SOF_FW_BOOT_READY_FAILED,
+	SOF_FW_BOOT_READY_OK,
+	SOF_FW_BOOT_COMPLETE,
+	SOF_FW_CRASHED,
+};
+
 /*
  * SOF Platform data.
  */
-- 
cgit v1.2.3


From d41607d37c1385da799f9a2ddb10c460e573687e Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 23 Dec 2021 13:36:18 +0200
Subject: ASoC: SOF: Rename 'enum snd_sof_fw_state' to 'enum sof_fw_state'

Since there is nothing SND about the firmware state, rename the enum
from `snd_sof_fw_state` to simply `sof_fw_state`

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Paul Olaru <paul.olaru@oss.nxp.com>
Link: https://lore.kernel.org/r/20211223113628.18582-11-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/sof.h b/include/sound/sof.h
index b9131c01eefd..813680ab9aad 100644
--- a/include/sound/sof.h
+++ b/include/sound/sof.h
@@ -18,7 +18,7 @@
 struct snd_sof_dsp_ops;
 
 /**
- * enum snd_sof_fw_state - DSP firmware state definitions
+ * enum sof_fw_state - DSP firmware state definitions
  * @SOF_FW_BOOT_NOT_STARTED:	firmware boot is not yet started
  * @SOF_FW_BOOT_PREPARE:	preparing for boot (firmware loading for exaqmple)
  * @SOF_FW_BOOT_IN_PROGRESS:	firmware boot is in progress
@@ -28,7 +28,7 @@ struct snd_sof_dsp_ops;
  * @SOF_FW_BOOT_COMPLETE:	firmware is booted up and functional
  * @SOF_FW_CRASHED:		firmware crashed after successful boot
  */
-enum snd_sof_fw_state {
+enum sof_fw_state {
 	SOF_FW_BOOT_NOT_STARTED = 0,
 	SOF_FW_BOOT_PREPARE,
 	SOF_FW_BOOT_IN_PROGRESS,
-- 
cgit v1.2.3


From 66b354064a35b6379963cba27b5d37a278fc9bd9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 23 Nov 2021 11:16:00 +0100
Subject: powercap/drivers/dtpm: Remove unused function definition

The dtpm.h header file is exporting a function which is not
implemented neither needed. Remove it.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20211123101601.2433340-1-daniel.lezcano@linaro.org
---
 include/linux/dtpm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index 2890f6370eb9..d37e5d06a357 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -70,6 +70,4 @@ void dtpm_unregister(struct dtpm *dtpm);
 
 int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
-int dtpm_register_cpu(struct dtpm *parent);
-
 #endif
-- 
cgit v1.2.3


From 9f01881beae9e40c87f3edfb81e87e973f85d272 Mon Sep 17 00:00:00 2001
From: Akhil R <akhilrajeev@nvidia.com>
Date: Thu, 16 Dec 2021 15:10:08 +0100
Subject: dt-bindings: gpio: Add Tegra241 support

Add the port definitions for the main and AON GPIO controllers found on
Tegra241 (Grace).

Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 include/dt-bindings/gpio/tegra241-gpio.h | 42 ++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 include/dt-bindings/gpio/tegra241-gpio.h

(limited to 'include')

diff --git a/include/dt-bindings/gpio/tegra241-gpio.h b/include/dt-bindings/gpio/tegra241-gpio.h
new file mode 100644
index 000000000000..80cee3016be6
--- /dev/null
+++ b/include/dt-bindings/gpio/tegra241-gpio.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. */
+
+/*
+ * This header provides constants for the nvidia,tegra241-gpio DT binding.
+ *
+ * The first cell in Tegra's GPIO specifier is the GPIO ID. The macros below
+ * provide names for this.
+ *
+ * The second cell contains standard flag values specified in gpio.h.
+ */
+
+#ifndef _DT_BINDINGS_GPIO_TEGRA241_GPIO_H
+#define _DT_BINDINGS_GPIO_TEGRA241_GPIO_H
+
+#include <dt-bindings/gpio/gpio.h>
+
+/* GPIOs implemented by main GPIO controller */
+#define TEGRA241_MAIN_GPIO_PORT_A 0
+#define TEGRA241_MAIN_GPIO_PORT_B 1
+#define TEGRA241_MAIN_GPIO_PORT_C 2
+#define TEGRA241_MAIN_GPIO_PORT_D 3
+#define TEGRA241_MAIN_GPIO_PORT_E 4
+#define TEGRA241_MAIN_GPIO_PORT_F 5
+#define TEGRA241_MAIN_GPIO_PORT_G 6
+#define TEGRA241_MAIN_GPIO_PORT_H 7
+#define TEGRA241_MAIN_GPIO_PORT_I 8
+#define TEGRA241_MAIN_GPIO_PORT_J 9
+#define TEGRA241_MAIN_GPIO_PORT_K 10
+#define TEGRA241_MAIN_GPIO_PORT_L 11
+
+#define TEGRA241_MAIN_GPIO(port, offset) \
+	((TEGRA241_MAIN_GPIO_PORT_##port * 8) + (offset))
+
+/* GPIOs implemented by AON GPIO controller */
+#define TEGRA241_AON_GPIO_PORT_AA 0
+#define TEGRA241_AON_GPIO_PORT_BB 1
+
+#define TEGRA241_AON_GPIO(port, offset) \
+	((TEGRA241_AON_GPIO_PORT_##port * 8) + (offset))
+
+#endif
-- 
cgit v1.2.3


From dd123e62bdedcd3a486e48e883ec63138ec2c14c Mon Sep 17 00:00:00 2001
From: Henning Schild <henning.schild@siemens.com>
Date: Mon, 13 Dec 2021 13:04:59 +0100
Subject: platform/x86: simatic-ipc: add main driver for Siemens devices

This mainly implements detection of these devices and will allow
secondary drivers to work on such machines.

The identification is DMI-based with a vendor specific way to tell them
apart in a reliable way.

Drivers for LEDs and Watchdogs will follow to make use of that platform
detection.

There is also some code to allow secondary drivers to find GPIO memory,
that needs to be in place because the pinctrl drivers do not come up.

Signed-off-by: Henning Schild <henning.schild@siemens.com>
Link: https://lore.kernel.org/r/20211213120502.20661-2-henning.schild@siemens.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/x86/simatic-ipc-base.h | 29 +++++++++
 include/linux/platform_data/x86/simatic-ipc.h      | 72 ++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 include/linux/platform_data/x86/simatic-ipc-base.h
 create mode 100644 include/linux/platform_data/x86/simatic-ipc.h

(limited to 'include')

diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h
new file mode 100644
index 000000000000..62d2bc774067
--- /dev/null
+++ b/include/linux/platform_data/x86/simatic-ipc-base.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Siemens SIMATIC IPC drivers
+ *
+ * Copyright (c) Siemens AG, 2018-2021
+ *
+ * Authors:
+ *  Henning Schild <henning.schild@siemens.com>
+ *  Gerd Haeussler <gerd.haeussler.ext@siemens.com>
+ */
+
+#ifndef __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H
+#define __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H
+
+#include <linux/types.h>
+
+#define SIMATIC_IPC_DEVICE_NONE 0
+#define SIMATIC_IPC_DEVICE_227D 1
+#define SIMATIC_IPC_DEVICE_427E 2
+#define SIMATIC_IPC_DEVICE_127E 3
+#define SIMATIC_IPC_DEVICE_227E 4
+
+struct simatic_ipc_platform {
+	u8	devmode;
+};
+
+u32 simatic_ipc_get_membase0(unsigned int p2sb);
+
+#endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H */
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
new file mode 100644
index 000000000000..f3b76b39776b
--- /dev/null
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Siemens SIMATIC IPC drivers
+ *
+ * Copyright (c) Siemens AG, 2018-2021
+ *
+ * Authors:
+ *  Henning Schild <henning.schild@siemens.com>
+ *  Gerd Haeussler <gerd.haeussler.ext@siemens.com>
+ */
+
+#ifndef __PLATFORM_DATA_X86_SIMATIC_IPC_H
+#define __PLATFORM_DATA_X86_SIMATIC_IPC_H
+
+#include <linux/dmi.h>
+#include <linux/platform_data/x86/simatic-ipc-base.h>
+
+#define SIMATIC_IPC_DMI_ENTRY_OEM	129
+/* binary type */
+#define SIMATIC_IPC_DMI_TYPE		0xff
+#define SIMATIC_IPC_DMI_GROUP		0x05
+#define SIMATIC_IPC_DMI_ENTRY		0x02
+#define SIMATIC_IPC_DMI_TID		0x02
+
+enum simatic_ipc_station_ids {
+	SIMATIC_IPC_INVALID_STATION_ID = 0,
+	SIMATIC_IPC_IPC227D = 0x00000501,
+	SIMATIC_IPC_IPC427D = 0x00000701,
+	SIMATIC_IPC_IPC227E = 0x00000901,
+	SIMATIC_IPC_IPC277E = 0x00000902,
+	SIMATIC_IPC_IPC427E = 0x00000A01,
+	SIMATIC_IPC_IPC477E = 0x00000A02,
+	SIMATIC_IPC_IPC127E = 0x00000D01,
+};
+
+static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
+{
+	struct {
+		u8	type;		/* type (0xff = binary) */
+		u8	len;		/* len of data entry */
+		u8	group;
+		u8	entry;
+		u8	tid;
+		__le32	station_id;	/* station id (LE) */
+	} __packed * data_entry = (void *)data + sizeof(struct dmi_header);
+
+	while ((u8 *)data_entry < data + max_len) {
+		if (data_entry->type == SIMATIC_IPC_DMI_TYPE &&
+		    data_entry->len == sizeof(*data_entry) &&
+		    data_entry->group == SIMATIC_IPC_DMI_GROUP &&
+		    data_entry->entry == SIMATIC_IPC_DMI_ENTRY &&
+		    data_entry->tid == SIMATIC_IPC_DMI_TID) {
+			return le32_to_cpu(data_entry->station_id);
+		}
+		data_entry = (void *)((u8 *)(data_entry) + data_entry->len);
+	}
+
+	return SIMATIC_IPC_INVALID_STATION_ID;
+}
+
+static inline void
+simatic_ipc_find_dmi_entry_helper(const struct dmi_header *dh, void *_data)
+{
+	u32 *id = _data;
+
+	if (dh->type != SIMATIC_IPC_DMI_ENTRY_OEM)
+		return;
+
+	*id = simatic_ipc_get_station_id((u8 *)dh, dh->length);
+}
+
+#endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_H */
-- 
cgit v1.2.3


From 3d4641a42ccf1593b3f3a474ee7541727acbb8e0 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan@gerhold.net>
Date: Tue, 14 Dec 2021 15:20:46 +0100
Subject: ASoC: core: Add snd_soc_of_parse_pin_switches() from
 simple-card-utils

The ASoC core already has several helpers to parse card properties
from the device tree. Move the parsing code for "pin-switches" from
simple-card-utils to a shared snd_soc_of_parse_pin_switches() function
so other drivers can also use it to set up pin switches configured in
the device tree.

Cc: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
Link: https://lore.kernel.org/r/20211214142049.20422-2-stephan@gerhold.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 5872a8864f3b..7a1650b303f1 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1211,6 +1211,7 @@ int snd_soc_of_parse_card_name(struct snd_soc_card *card,
 			       const char *propname);
 int snd_soc_of_parse_audio_simple_widgets(struct snd_soc_card *card,
 					  const char *propname);
+int snd_soc_of_parse_pin_switches(struct snd_soc_card *card, const char *prop);
 int snd_soc_of_get_slot_mask(struct device_node *np,
 			     const char *prop_name,
 			     unsigned int *mask);
-- 
cgit v1.2.3


From b86947b52f0d0e5b6e6f0510933ca13aad266e47 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 24 Dec 2021 10:10:29 +0800
Subject: ASoC/soundwire: intel: simplify callbacks for params/hw_free

We don't really need to pass a substream to the callback, we only need
the direction. No functionality change, only simplification to enable
improve suspend with paused streams.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-By: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20211224021034.26635-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 8a463b8fc12a..67e0d3e750b5 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -92,7 +92,7 @@
  * firmware.
  */
 struct sdw_intel_stream_params_data {
-	struct snd_pcm_substream *substream;
+	int stream;
 	struct snd_soc_dai *dai;
 	struct snd_pcm_hw_params *hw_params;
 	int link_id;
@@ -105,7 +105,7 @@ struct sdw_intel_stream_params_data {
  * firmware.
  */
 struct sdw_intel_stream_free_data {
-	struct snd_pcm_substream *substream;
+	int stream;
 	struct snd_soc_dai *dai;
 	int link_id;
 };
-- 
cgit v1.2.3


From e8444560b4d9302a511f0996f4cfdf85b628f4ca Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 24 Dec 2021 10:10:31 +0800
Subject: ASoC/SoundWire: dai: expand 'stream' concept beyond SoundWire

The HDAudio ASoC support relies on the set_tdm_slots() helper to store
the HDaudio stream tag in the tx_mask. This only works because of the
pre-existing order in soc-pcm.c, where the hw_params() is handled for
codec_dais *before* cpu_dais. When the order is reversed, the
stream_tag is used as a mask in the codec fixup functions:

	/* fixup params based on TDM slot masks */
	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
	    codec_dai->tx_mask)
		soc_pcm_codec_params_fixup(&codec_params,
					   codec_dai->tx_mask);

As a result of this confusion, the codec_params_fixup() ends-up
generating bad channel masks, depending on what stream_tag was
allocated.

We could add a flag to state that the tx_mask is really not a mask,
but it would be quite ugly to persist in overloading concepts.

Instead, this patch suggests a more generic get/set 'stream' API based
on the existing model for SoundWire. We can expand the concept to
store 'stream' opaque information that is specific to different DAI
types. In the case of HDAudio DAIs, we only need to store a stream tag
as an unsigned char pointer. The TDM rx_ and tx_masks should really
only be used to store masks.

Rename get_sdw_stream/set_sdw_stream callbacks and helpers as
get_stream/set_stream. No functionality change beyond the rename.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-By: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20211224021034.26635-5-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 5d4dd7c5450b..bbd821d2df9c 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -295,9 +295,9 @@ struct snd_soc_dai_ops {
 			unsigned int *rx_num, unsigned int *rx_slot);
 	int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
 
-	int (*set_sdw_stream)(struct snd_soc_dai *dai,
-			void *stream, int direction);
-	void *(*get_sdw_stream)(struct snd_soc_dai *dai, int direction);
+	int (*set_stream)(struct snd_soc_dai *dai,
+			  void *stream, int direction);
+	void *(*get_stream)(struct snd_soc_dai *dai, int direction);
 
 	/*
 	 * DAI digital mute - optional.
@@ -515,42 +515,42 @@ static inline void *snd_soc_dai_get_drvdata(struct snd_soc_dai *dai)
 }
 
 /**
- * snd_soc_dai_set_sdw_stream() - Configures a DAI for SDW stream operation
+ * snd_soc_dai_set_stream() - Configures a DAI for stream operation
  * @dai: DAI
- * @stream: STREAM
+ * @stream: STREAM (opaque structure depending on DAI type)
  * @direction: Stream direction(Playback/Capture)
- * SoundWire subsystem doesn't have a notion of direction and we reuse
+ * Some subsystems, such as SoundWire, don't have a notion of direction and we reuse
  * the ASoC stream direction to configure sink/source ports.
  * Playback maps to source ports and Capture for sink ports.
  *
  * This should be invoked with NULL to clear the stream set previously.
  * Returns 0 on success, a negative error code otherwise.
  */
-static inline int snd_soc_dai_set_sdw_stream(struct snd_soc_dai *dai,
-				void *stream, int direction)
+static inline int snd_soc_dai_set_stream(struct snd_soc_dai *dai,
+					 void *stream, int direction)
 {
-	if (dai->driver->ops->set_sdw_stream)
-		return dai->driver->ops->set_sdw_stream(dai, stream, direction);
+	if (dai->driver->ops->set_stream)
+		return dai->driver->ops->set_stream(dai, stream, direction);
 	else
 		return -ENOTSUPP;
 }
 
 /**
- * snd_soc_dai_get_sdw_stream() - Retrieves SDW stream from DAI
+ * snd_soc_dai_get_stream() - Retrieves stream from DAI
  * @dai: DAI
  * @direction: Stream direction(Playback/Capture)
  *
  * This routine only retrieves that was previously configured
- * with snd_soc_dai_get_sdw_stream()
+ * with snd_soc_dai_get_stream()
  *
  * Returns pointer to stream or an ERR_PTR value, e.g.
  * ERR_PTR(-ENOTSUPP) if callback is not supported;
  */
-static inline void *snd_soc_dai_get_sdw_stream(struct snd_soc_dai *dai,
-					       int direction)
+static inline void *snd_soc_dai_get_stream(struct snd_soc_dai *dai,
+					   int direction)
 {
-	if (dai->driver->ops->get_sdw_stream)
-		return dai->driver->ops->get_sdw_stream(dai, direction);
+	if (dai->driver->ops->get_stream)
+		return dai->driver->ops->get_stream(dai, direction);
 	else
 		return ERR_PTR(-ENOTSUPP);
 }
-- 
cgit v1.2.3


From 54bf7fa3efd08eea03e4bac04e188ee3db6173a7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 13 Dec 2021 17:11:45 +0100
Subject: ima: Fix undefined arch_ima_get_secureboot() and co

Currently arch_ima_get_secureboot() and arch_get_ima_policy() are
defined only when CONFIG_IMA is set, and this makes any code calling
those functions without CONFIG_IMA fail.

Move the declaration and the dummy definition of those functions
outside ifdef-CONFIG_IMA block for fixing the undefined symbols.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
[zohar@linux.ibm.com: removed in-tree/out-of-tree comment in patch description]
Reviewed-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/ima.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index b6ab66a546ae..426b1744215e 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -50,21 +50,6 @@ static inline void ima_appraise_parse_cmdline(void) {}
 extern void ima_add_kexec_buffer(struct kimage *image);
 #endif
 
-#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
-extern bool arch_ima_get_secureboot(void);
-extern const char * const *arch_get_ima_policy(void);
-#else
-static inline bool arch_ima_get_secureboot(void)
-{
-	return false;
-}
-
-static inline const char * const *arch_get_ima_policy(void)
-{
-	return NULL;
-}
-#endif
-
 #else
 static inline enum hash_algo ima_get_current_hash_algo(void)
 {
@@ -155,6 +140,21 @@ static inline int ima_measure_critical_data(const char *event_label,
 
 #endif /* CONFIG_IMA */
 
+#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
+extern bool arch_ima_get_secureboot(void);
+extern const char * const *arch_get_ima_policy(void);
+#else
+static inline bool arch_ima_get_secureboot(void)
+{
+	return false;
+}
+
+static inline const char * const *arch_get_ima_policy(void)
+{
+	return NULL;
+}
+#endif
+
 #ifndef CONFIG_IMA_KEXEC
 struct kimage;
 
-- 
cgit v1.2.3


From 12054f0ce8be7d2003ec068ab27c9eb608397b98 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 16 Dec 2021 17:11:27 -0600
Subject: ALSA/ASoC: hda: move/rename snd_hdac_ext_stop_streams to
 hdac_stream.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

snd_hdac_ext_stop_streams() has really nothing to do with the
extension, it just loops over the bus streams.

Move it to the hdac_stream layer and rename to remove the 'ext'
prefix and add the precision that the chip will also be stopped.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
Link: https://lore.kernel.org/r/20211216231128.344321-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h     | 1 +
 include/sound/hdaudio_ext.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 22af68b01426..6a90ce405e60 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -558,6 +558,7 @@ int snd_hdac_stream_set_params(struct hdac_stream *azx_dev,
 void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start);
 void snd_hdac_stream_clear(struct hdac_stream *azx_dev);
 void snd_hdac_stream_stop(struct hdac_stream *azx_dev);
+void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus);
 void snd_hdac_stream_reset(struct hdac_stream *azx_dev);
 void snd_hdac_stream_sync_trigger(struct hdac_stream *azx_dev, bool set,
 				  unsigned int streams, unsigned int reg);
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index d4e31ea16aba..56ea5cde5e63 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -92,7 +92,6 @@ void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus,
 				  struct hdac_ext_stream *azx_dev, bool decouple);
 void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
 				struct hdac_ext_stream *azx_dev, bool decouple);
-void snd_hdac_ext_stop_streams(struct hdac_bus *bus);
 
 int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus,
 				 struct hdac_ext_stream *stream, u32 value);
-- 
cgit v1.2.3


From 0f7e5ee62f4c24ca9db58351c86653cc3ee0bd0e Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 16 Dec 2021 17:11:28 -0600
Subject: ALSA: HDA: hdac_ext_stream: use consistent prefixes for variables

The existing code maximizes confusion by using 'stream' and 'hstream'
variables of different types. Examples:

struct hdac_stream *stream;
struct hdac_ext_stream *stream;
struct hdac_stream *hstream;
struct hdac_ext_stream *hstream;

with some additional copy/paste remains:
struct hdac_ext_stream *azx_dev;

This patch suggests a consistent naming across all 'hdac_ext_stream'
functions. The convention is:

struct hdac_stream *hstream;
struct hdac_ext_stream *hext_stream;

No functionality change - just renaming of variables and more
consistent indentation.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Link: https://lore.kernel.org/r/20211216231128.344321-3-pierre-louis.bossart@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio_ext.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index 56ea5cde5e63..77123c3e4095 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -78,35 +78,35 @@ struct hdac_ext_stream {
 	container_of(s, struct hdac_ext_stream, hstream)
 
 void snd_hdac_ext_stream_init(struct hdac_bus *bus,
-				struct hdac_ext_stream *stream, int idx,
-				int direction, int tag);
+			      struct hdac_ext_stream *hext_stream, int idx,
+			      int direction, int tag);
 int snd_hdac_ext_stream_init_all(struct hdac_bus *bus, int start_idx,
-		int num_stream, int dir);
+				 int num_stream, int dir);
 void snd_hdac_stream_free_all(struct hdac_bus *bus);
 void snd_hdac_link_free_all(struct hdac_bus *bus);
 struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus,
 					   struct snd_pcm_substream *substream,
 					   int type);
-void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type);
+void snd_hdac_ext_stream_release(struct hdac_ext_stream *hext_stream, int type);
 void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus,
-				  struct hdac_ext_stream *azx_dev, bool decouple);
+					 struct hdac_ext_stream *hext_stream, bool decouple);
 void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
 				struct hdac_ext_stream *azx_dev, bool decouple);
 
 int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus,
-				 struct hdac_ext_stream *stream, u32 value);
+				 struct hdac_ext_stream *hext_stream, u32 value);
 int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus,
-				 struct hdac_ext_stream *stream);
+				       struct hdac_ext_stream *hext_stream);
 void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus,
 				bool enable, int index);
 int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus,
-				struct hdac_ext_stream *stream, u32 value);
-int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *stream, u32 value);
+				struct hdac_ext_stream *hext_stream, u32 value);
+int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *hext_stream, u32 value);
 
-void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hstream);
-void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hstream);
-void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hstream);
-int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *stream, int fmt);
+void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hext_stream);
+void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hext_stream);
+void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hext_stream);
+int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *hext_stream, int fmt);
 
 struct hdac_ext_link {
 	struct hdac_bus *bus;
-- 
cgit v1.2.3


From 5ec7d18d1813a5bead0b495045606c93873aecbb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 23 Dec 2021 13:04:30 -0500
Subject: sctp: use call_rcu to free endpoint

This patch is to delay the endpoint free by calling call_rcu() to fix
another use-after-free issue in sctp_sock_dump():

  BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20
  Call Trace:
    __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218
    lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844
    __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline]
    _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168
    spin_lock_bh include/linux/spinlock.h:334 [inline]
    __lock_sock+0x203/0x350 net/core/sock.c:2253
    lock_sock_nested+0xfe/0x120 net/core/sock.c:2774
    lock_sock include/net/sock.h:1492 [inline]
    sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324
    sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091
    sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527
    __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049
    inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065
    netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244
    __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352
    netlink_dump_start include/linux/netlink.h:216 [inline]
    inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170
    __sock_diag_cmd net/core/sock_diag.c:232 [inline]
    sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263
    netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477
    sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274

This issue occurs when asoc is peeled off and the old sk is freed after
getting it by asoc->base.sk and before calling lock_sock(sk).

To prevent the sk free, as a holder of the sk, ep should be alive when
calling lock_sock(). This patch uses call_rcu() and moves sock_put and
ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to
hold the ep under rcu_read_lock in sctp_transport_traverse_process().

If sctp_endpoint_hold() returns true, it means this ep is still alive
and we have held it and can continue to dump it; If it returns false,
it means this ep is dead and can be freed after rcu_read_unlock, and
we should skip it.

In sctp_sock_dump(), after locking the sk, if this ep is different from
tsp->asoc->ep, it means during this dumping, this asoc was peeled off
before calling lock_sock(), and the sk should be skipped; If this ep is
the same with tsp->asoc->ep, it means no peeloff happens on this asoc,
and due to lock_sock, no peeloff will happen either until release_sock.

Note that delaying endpoint free won't delay the port release, as the
port release happens in sctp_endpoint_destroy() before calling call_rcu().
Also, freeing endpoint by call_rcu() makes it safe to access the sk by
asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv().

Thanks Jones to bring this issue up.

v1->v2:
  - improve the changelog.
  - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed.

Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com
Reported-by: Lee Jones <lee.jones@linaro.org>
Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    | 6 +++---
 include/net/sctp/structs.h | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 189fdb9db162..d314a180ab93 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated;
 int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
 struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
 
+typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
 void sctp_transport_walk_start(struct rhashtable_iter *iter);
 void sctp_transport_walk_stop(struct rhashtable_iter *iter);
 struct sctp_transport *sctp_transport_get_next(struct net *net,
@@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
 				  struct net *net,
 				  const union sctp_addr *laddr,
 				  const union sctp_addr *paddr, void *p);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
-			    int (*cb_done)(struct sctp_transport *, void *),
-			    struct net *net, int *pos, void *p);
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+				    struct net *net, int *pos, void *p);
 int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
 int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
 		       struct sctp_info *info);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 899c29c326ba..8dabd8800006 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1355,6 +1355,7 @@ struct sctp_endpoint {
 	      reconf_enable:1;
 
 	__u8  strreset_enable;
+	struct rcu_head rcu;
 };
 
 /* Recover the outter endpoint structure. */
@@ -1370,7 +1371,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
 struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
 void sctp_endpoint_free(struct sctp_endpoint *);
 void sctp_endpoint_put(struct sctp_endpoint *);
-void sctp_endpoint_hold(struct sctp_endpoint *);
+int sctp_endpoint_hold(struct sctp_endpoint *ep);
 void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
 struct sctp_association *sctp_endpoint_lookup_assoc(
 	const struct sctp_endpoint *ep,
-- 
cgit v1.2.3


From 94ab10dd42a70acc5208a41325617e3d9cf81a70 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 24 Dec 2021 21:12:48 -0800
Subject: mm: delete unsafe BUG from page_cache_add_speculative()

It is not easily reproducible, but on 5.16-rc I have several times hit
the VM_BUG_ON_PAGE(PageTail(page), page) in
page_cache_add_speculative(): usually from filemap_get_read_batch() for
an ext4 read, yesterday from next_uptodate_page() from
filemap_map_pages() for a shmem fault.

That BUG used to be placed where page_ref_add_unless() had succeeded,
but now it is placed before folio_ref_add_unless() is attempted: that is
not safe, since it is only the acquired reference which makes the page
safe from racing THP collapse or split.

We could keep the BUG, checking PageTail only when
folio_ref_try_add_rcu() has succeeded; but I don't think it adds much
value - just delete it.

Link: https://lkml.kernel.org/r/8b98fc6f-3439-8614-c3f3-945c659a1aba@google.com
Fixes: 020853b6f5ea ("mm: Add folio_try_get_rcu()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 605246452305..d150a9082b31 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -285,7 +285,6 @@ static inline struct inode *folio_inode(struct folio *folio)
 
 static inline bool page_cache_add_speculative(struct page *page, int count)
 {
-	VM_BUG_ON_PAGE(PageTail(page), page);
 	return folio_ref_try_add_rcu((struct folio *)page, count);
 }
 
-- 
cgit v1.2.3


From 595ec1973c276f6c0c1de8aca5eef8dfd81f9b49 Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Fri, 24 Dec 2021 21:12:51 -0800
Subject: mm/page_alloc: fix __alloc_size attribute for alloc_pages_exact_nid

The second parameter of alloc_pages_exact_nid is the one indicating the
size of memory pointed by the returned pointer.

Link: https://lkml.kernel.org/r/YbjEgwhn4bGblp//@coeus
Fixes: abd58f38dfb4 ("mm/page_alloc: add __alloc_size attributes for better bounds checking")
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b976c4177299..8fcc38467af6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -624,7 +624,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
 void free_pages_exact(void *virt, size_t size);
-__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1);
+__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
-- 
cgit v1.2.3


From 48f31169830f589e4c7ac475ccc7414951ded3f0 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Thu, 14 Oct 2021 22:38:41 +0300
Subject: habanalabs: change wait for interrupt timeout to 64 bit

In order to increase maximum wait-for-interrupt timeout, change it
to 64 bit variable. This wait is used only by newer ASICs, so no
problem in changing this interface at this time.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 00b309590499..c5760acebdd1 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -911,14 +911,18 @@ struct hl_wait_cs_in {
 	 */
 	__u32 flags;
 
-	/* Multi CS API info- valid entries in multi-CS array */
-	__u8 seq_arr_len;
-	__u8 pad[3];
+	union {
+		struct {
+			/* Multi CS API info- valid entries in multi-CS array */
+			__u8 seq_arr_len;
+			__u8 pad[7];
+		};
 
-	/* Absolute timeout to wait for an interrupt in microseconds.
-	 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
-	 */
-	__u32 interrupt_timeout_us;
+		/* Absolute timeout to wait for an interrupt in microseconds.
+		 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+		 */
+		__u64 interrupt_timeout_us;
+	};
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED	0
-- 
cgit v1.2.3


From 1679c7ee580fdaa2a5df398a526b2eddc857f2a1 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Mon, 25 Oct 2021 09:47:04 +0300
Subject: habanalabs: expand clock throttling information uAPI

In addition to the clock throttling reason, user should be able
to obtain also the start time and the duration of the throttling
event.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index c5760acebdd1..257b9630773e 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -473,15 +473,27 @@ struct hl_info_pci_counters {
 	__u64 replay_cnt;
 };
 
-#define HL_CLK_THROTTLE_POWER	0x1
-#define HL_CLK_THROTTLE_THERMAL	0x2
+enum hl_clk_throttling_type {
+	HL_CLK_THROTTLE_TYPE_POWER,
+	HL_CLK_THROTTLE_TYPE_THERMAL,
+	HL_CLK_THROTTLE_TYPE_MAX
+};
+
+/* clk_throttling_reason masks */
+#define HL_CLK_THROTTLE_POWER		(1 << HL_CLK_THROTTLE_TYPE_POWER)
+#define HL_CLK_THROTTLE_THERMAL		(1 << HL_CLK_THROTTLE_TYPE_THERMAL)
 
 /**
  * struct hl_info_clk_throttle - clock throttling reason
  * @clk_throttling_reason: each bit represents a clk throttling reason
+ * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event
+ * @clk_throttling_duration_ns: the clock throttle time in nanosec
  */
 struct hl_info_clk_throttle {
 	__u32 clk_throttling_reason;
+	__u32 pad;
+	__u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX];
+	__u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX];
 };
 
 /**
-- 
cgit v1.2.3


From 49c052dad691ba1a3dc3559b74e99f2ec2fa0319 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Sun, 24 Oct 2021 19:02:32 +0300
Subject: habanalabs: add new opcodes for INFO IOCTL

Add implementation for new opcodes in the INFO IOCTL:
1. Retrieve the replaced DRAM rows from f/w.
2. Retrieve the pending DRAM rows from f/w.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 257b9630773e..9b4d72897061 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -334,6 +334,8 @@ enum hl_server_type {
  * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
  * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
  * HL_INFO_OPEN_STATS    - Retrieve info regarding recent device open calls
+ * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info
+ * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -353,6 +355,8 @@ enum hl_server_type {
 #define HL_INFO_PLL_FREQUENCY		16
 #define HL_INFO_POWER			17
 #define HL_INFO_OPEN_STATS		18
+#define HL_INFO_DRAM_REPLACED_ROWS	21
+#define HL_INFO_DRAM_PENDING_ROWS	22
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
-- 
cgit v1.2.3


From 3e55b5dbf929a40966b8eb7d4de94fad3bb404bd Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Wed, 3 Nov 2021 10:09:59 +0200
Subject: habanalabs: add support for fetching historic errors

A new uAPI is added for debug purposes of the user-space to retrieve
errors related data from previous session (before device reset was
performed).

Inforamtion is filled when a razwi or CS timeout happens and can
contain one of the following:

1. Retrieve timestamp of last time the device was opened and razwi or
   CS timeout happened.
2. Retrieve information about last CS timeout.
3. Retrieve information about last razwi error.

This information doesn't contain user data, so no danger of data
leakage between users.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 58 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 9b4d72897061..eb8565fdae70 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -336,6 +336,14 @@ enum hl_server_type {
  * HL_INFO_OPEN_STATS    - Retrieve info regarding recent device open calls
  * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info
  * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num
+ * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened
+ *                                  and CS timeout or razwi error occurred.
+ * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number.
+ * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi:
+ *                            Timestamp of razwi.
+ *                            The address which accessing it caused the razwi.
+ *                            Razwi initiator.
+ *                            Razwi cause, was it a page fault or MMU access error.
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -357,8 +365,11 @@ enum hl_server_type {
 #define HL_INFO_OPEN_STATS		18
 #define HL_INFO_DRAM_REPLACED_ROWS	21
 #define HL_INFO_DRAM_PENDING_ROWS	22
+#define HL_INFO_LAST_ERR_OPEN_DEV_TIME	23
+#define HL_INFO_CS_TIMEOUT_EVENT	24
+#define HL_INFO_RAZWI_EVENT		25
 
-#define HL_INFO_VERSION_MAX_LEN	128
+#define HL_INFO_VERSION_MAX_LEN		128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
 
 /**
@@ -575,6 +586,51 @@ struct hl_info_cs_counters {
 	__u64 ctx_validation_drop_cnt;
 };
 
+/**
+ * struct hl_info_last_err_open_dev_time - last error boot information.
+ * @timestamp: timestamp of last time the device was opened and error occurred.
+ */
+struct hl_info_last_err_open_dev_time {
+	__s64 timestamp;
+};
+
+/**
+ * struct hl_info_cs_timeout_event - last CS timeout information.
+ * @timestamp: timestamp when last CS timeout event occurred.
+ * @seq: sequence number of last CS timeout event.
+ */
+struct hl_info_cs_timeout_event {
+	__s64 timestamp;
+	__u64 seq;
+};
+
+#define HL_RAZWI_PAGE_FAULT 0
+#define HL_RAZWI_MMU_ACCESS_ERROR 1
+
+/**
+ * struct hl_info_razwi_event - razwi information.
+ * @timestamp: timestamp of razwi.
+ * @addr: address which accessing it caused razwi.
+ * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not
+ *               have engine id it will be set to U16_MAX.
+ * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
+ *               engines which one them caused the razwi. In that case, it will contain the
+ *               second possible engine id, otherwise it will be set to U16_MAX.
+ * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1,
+ *                otherwise 0.
+ * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
+ * @pad: padding to 64 bit.
+ */
+struct hl_info_razwi_event {
+	__s64 timestamp;
+	__u64 addr;
+	__u16 engine_id_1;
+	__u16 engine_id_2;
+	__u8 no_engine_id;
+	__u8 error_type;
+	__u8 pad[2];
+};
+
 enum gaudi_dcores {
 	HL_GAUDI_WS_DCORE,
 	HL_GAUDI_WN_DCORE,
-- 
cgit v1.2.3


From 75a5c44d143bc1818e8004a8bee6993aba3a75cf Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 18 Nov 2021 10:44:05 +0200
Subject: habanalabs: add power information type to POWER_GET packet

In new f/w versions, it is required to explicitly indicate the power
information type when querying the F/W for power info.
When getting the current power level it should be set to power_input.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index eb8565fdae70..cd86937c572d 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -333,6 +333,7 @@ enum hl_server_type {
  * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
  * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
  * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
+ * HL_INFO_POWER         - Retrieve power information
  * HL_INFO_OPEN_STATS    - Retrieve info regarding recent device open calls
  * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info
  * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num
-- 
cgit v1.2.3


From 1880f7acd7e0edacbd46385036253801ddc4273f Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Tue, 9 Nov 2021 11:33:28 +0200
Subject: habanalabs: add SOB information to signal submission uAPI

For debug purpose, add SOB address and SOB initial counter value
before current submission to uAPI output.

Using SOB address and initial counter, user can calculate how much of
the submmision has been completed.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index cd86937c572d..648850b954a3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -929,9 +929,17 @@ struct hl_cs_out {
 
 	/*
 	 * SOB base address offset
-	 * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
+	 * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set
 	 */
 	__u32 sob_base_addr_offset;
+
+	/*
+	 * Count of completed signals in SOB before current signal submission.
+	 * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION)
+	 * or HL_CS_FLAGS_SIGNAL is set
+	 */
+	__u16 sob_count_before_submission;
+	__u16 pad[3];
 };
 
 union hl_cs_args {
-- 
cgit v1.2.3


From b9d31cada7d9f137028c11534fff77fec8511690 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Tue, 2 Nov 2021 11:34:18 +0200
Subject: habanalabs: change wait_for_interrupt implementation

Currently the cq counters are allocated in userspace memory,
and mapped by the driver to the device address space.

A new requirement that is part of new future API related to this one,
requires that cq counters will be allocated in kernel memory.

We leverage the existing cb_create API with KERNEL_MAPPED flag set to
allocate this memory.

That way we gain two things:
1. The memory cannot be freed while in use since it's protected
by refcount in driver.

2. No need to wake up the user thread upon each interrupt from CQ,
because the kernel has direct access to the counter. Therefore,
it can make comparison with the target value in the interrupt
handler and wake up the user thread only if the counter reaches the
target value. This is instead of waking the thread up to copy counter
value from user then go sleep again if target value wasn't reached.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 61 ++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 648850b954a3..371dfc4243b3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -680,7 +680,10 @@ struct hl_info_args {
 #define HL_MAX_CB_SIZE		(0x200000 - 32)
 
 /* Indicates whether the command buffer should be mapped to the device's MMU */
-#define HL_CB_FLAGS_MAP		0x1
+#define HL_CB_FLAGS_MAP			0x1
+
+/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */
+#define HL_CB_FLAGS_GET_DEVICE_VA	0x2
 
 struct hl_cb_in {
 	/* Handle of CB or 0 if we want to create one */
@@ -702,11 +705,16 @@ struct hl_cb_out {
 		/* Handle of CB */
 		__u64 cb_handle;
 
-		/* Information about CB */
-		struct {
-			/* Usage count of CB */
-			__u32 usage_cnt;
-			__u32 pad;
+		union {
+			/* Information about CB */
+			struct {
+				/* Usage count of CB */
+				__u32 usage_cnt;
+				__u32 pad;
+			};
+
+			/* CB mapped address to device MMU */
+			__u64 device_va;
 		};
 	};
 };
@@ -947,9 +955,10 @@ union hl_cs_args {
 	struct hl_cs_out out;
 };
 
-#define HL_WAIT_CS_FLAGS_INTERRUPT	0x2
-#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
-#define HL_WAIT_CS_FLAGS_MULTI_CS	0x4
+#define HL_WAIT_CS_FLAGS_INTERRUPT		0x2
+#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK		0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS		0x4
+#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ	0x10
 
 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN	32
 
@@ -969,14 +978,23 @@ struct hl_wait_cs_in {
 		};
 
 		struct {
-			/* User address for completion comparison.
-			 * upon interrupt, driver will compare the value pointed
-			 * by this address with the supplied target value.
-			 * in order not to perform any comparison, set address
-			 * to all 1s.
-			 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
-			 */
-			__u64 addr;
+			union {
+				/* User address for completion comparison.
+				 * upon interrupt, driver will compare the value pointed
+				 * by this address with the supplied target value.
+				 * in order not to perform any comparison, set address
+				 * to all 1s.
+				 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+				 */
+				__u64 addr;
+
+				/* cq_counters_handle to a kernel mapped cb which contains
+				 * cq counters.
+				 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
+				 */
+				__u64 cq_counters_handle;
+			};
+
 			/* Target value for completion comparison */
 			__u64 target;
 		};
@@ -1004,6 +1022,15 @@ struct hl_wait_cs_in {
 		 */
 		__u64 interrupt_timeout_us;
 	};
+
+	/*
+	 * cq counter offset inside the counters cb pointed by cq_counters_handle above.
+	 * upon interrupt, driver will compare the value pointed
+	 * by this address (cq_counters_handle + cq_counters_offset)
+	 * with the supplied target value.
+	 * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
+	 */
+	__u64 cq_counters_offset;
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED	0
-- 
cgit v1.2.3


From 4fb0abfee424b05f0ec6d2d09e38f04ee2b82a8a Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 8 Nov 2021 15:51:21 -0600
Subject: x86/amd_nb: Add AMD Family 19h Models (10h-1Fh) and (A0h-AFh) PCI IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the new PCI Device IDs to support new generation of AMD 19h family of
processors.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>  # pci_ids.h
Link: https://lore.kernel.org/r/163640828133.955062.18349019796157170473.stgit@bmoger-ubuntu
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 011f2f1ea5bb..b5248f27910e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -555,6 +555,7 @@
 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b
 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
 #define PCI_DEVICE_ID_AMD_19H_DF_F3	0x1653
+#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0
 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c
 #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
-- 
cgit v1.2.3


From 11a24ca7e34d968991a7d437b950d1924396bd81 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 25 Nov 2021 03:08:38 +0100
Subject: hwmon: (ntc_thermistor) Merge platform data into driver

Platform data is supposed to be used with "board files",
device descriptions in C. Since the introduction of the
NTC driver in 2011, no such platforms have been submitted
to the Linux kernel, and their use is strongly discouraged
in favor of Device Tree, ACPI or as last resort software
firmware nodes.

Drop the external header and copy the platform data into
the driver file.

Cc: Peter Rosin <peda@axentia.se>
Cc: Chris Lesiak <chris.lesiak@licor.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20211125020841.3616359-2-linus.walleij@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/platform_data/ntc_thermistor.h | 50 ----------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 include/linux/platform_data/ntc_thermistor.h

(limited to 'include')

diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h
deleted file mode 100644
index b324d03e580c..000000000000
--- a/include/linux/platform_data/ntc_thermistor.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * ntc_thermistor.h - NTC Thermistors
- *
- *  Copyright (C) 2010 Samsung Electronics
- *  MyungJoo Ham <myungjoo.ham@samsung.com>
- */
-#ifndef _LINUX_NTC_H
-#define _LINUX_NTC_H
-
-struct iio_channel;
-
-enum ntc_thermistor_type {
-	TYPE_B57330V2103,
-	TYPE_B57891S0103,
-	TYPE_NCPXXWB473,
-	TYPE_NCPXXWF104,
-	TYPE_NCPXXWL333,
-	TYPE_NCPXXXH103,
-};
-
-struct ntc_thermistor_platform_data {
-	/*
-	 * One (not both) of read_uV and read_ohm should be provided and only
-	 * one of the two should be provided.
-	 * Both functions should return negative value for an error case.
-	 *
-	 * pullup_uV, pullup_ohm, pulldown_ohm, and connect are required to use
-	 * read_uV()
-	 *
-	 * How to setup pullup_ohm, pulldown_ohm, and connect is
-	 * described at Documentation/hwmon/ntc_thermistor.rst
-	 *
-	 * pullup/down_ohm: 0 for infinite / not-connected
-	 *
-	 * chan: iio_channel pointer to communicate with the ADC which the
-	 * thermistor is using for conversion of the analog values.
-	 */
-	int (*read_uv)(struct ntc_thermistor_platform_data *);
-	unsigned int pullup_uv;
-
-	unsigned int pullup_ohm;
-	unsigned int pulldown_ohm;
-	enum { NTC_CONNECTED_POSITIVE, NTC_CONNECTED_GROUND } connect;
-	struct iio_channel *chan;
-
-	int (*read_ohm)(void);
-};
-
-#endif /* _LINUX_NTC_H */
-- 
cgit v1.2.3


From 130d168866a11829b844ffdb19b9aefe384f754c Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 16 Dec 2021 16:42:57 +0100
Subject: hwmon: prefix kernel-doc comments for structs with struct

The command ./scripts/kernel-doc -none include/linux/hwmon.h warns:

  include/linux/hwmon.h:406: warning: This comment starts with '/**', but
    isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
   * Channel information
  include/linux/hwmon.h:425: warning: This comment starts with '/**', but
    isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
   * Chip configuration

Address those kernel-doc warnings by prefixing kernel-doc descriptions for
structs with the keyword 'struct'.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20211216154257.26758-1-lukas.bulwahn@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 1e8d6ea8992e..fad1f1df26df 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -403,7 +403,7 @@ struct hwmon_ops {
 };
 
 /**
- * Channel information
+ * struct hwmon_channel_info - Channel information
  * @type:	Channel type.
  * @config:	Pointer to NULL-terminated list of channel parameters.
  *		Use for per-channel attributes.
@@ -422,7 +422,7 @@ struct hwmon_channel_info {
 	})
 
 /**
- * Chip configuration
+ * struct hwmon_chip_info - Chip configuration
  * @ops:	Pointer to hwmon operations.
  * @info:	Null-terminated list of channel information.
  */
-- 
cgit v1.2.3


From ee6d3dd4ed48ab24b74bab3c3977b8218518247d Mon Sep 17 00:00:00 2001
From: Wedson Almeida Filho <wedsonaf@google.com>
Date: Fri, 24 Dec 2021 23:13:45 +0000
Subject: driver core: make kobj_type constant.

This way instances of kobj_type (which contain function pointers) can be
stored in .rodata, which means that they cannot be [easily/accidentally]
modified at runtime.

Signed-off-by: Wedson Almeida Filho <wedsonaf@google.com>
Link: https://lore.kernel.org/r/20211224231345.777370-1-wedsonaf@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index c740062b4b1a..683172b2e094 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -66,7 +66,7 @@ struct kobject {
 	struct list_head	entry;
 	struct kobject		*parent;
 	struct kset		*kset;
-	struct kobj_type	*ktype;
+	const struct kobj_type	*ktype;
 	struct kernfs_node	*sd; /* sysfs directory entry */
 	struct kref		kref;
 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
@@ -90,13 +90,13 @@ static inline const char *kobject_name(const struct kobject *kobj)
 	return kobj->name;
 }
 
-extern void kobject_init(struct kobject *kobj, struct kobj_type *ktype);
+extern void kobject_init(struct kobject *kobj, const struct kobj_type *ktype);
 extern __printf(3, 4) __must_check
 int kobject_add(struct kobject *kobj, struct kobject *parent,
 		const char *fmt, ...);
 extern __printf(4, 5) __must_check
 int kobject_init_and_add(struct kobject *kobj,
-			 struct kobj_type *ktype, struct kobject *parent,
+			 const struct kobj_type *ktype, struct kobject *parent,
 			 const char *fmt, ...);
 
 extern void kobject_del(struct kobject *kobj);
@@ -217,7 +217,7 @@ static inline void kset_put(struct kset *k)
 	kobject_put(&k->kobj);
 }
 
-static inline struct kobj_type *get_ktype(struct kobject *kobj)
+static inline const struct kobj_type *get_ktype(struct kobject *kobj)
 {
 	return kobj->ktype;
 }
-- 
cgit v1.2.3


From 253f06c7b1c1729b50e7ec52638e046239327bb1 Mon Sep 17 00:00:00 2001
From: Swapnil Jakhade <sjakhade@cadence.com>
Date: Thu, 23 Dec 2021 07:01:25 +0100
Subject: dt-bindings: phy: cadence-torrent: Rename SSC macros to use generic
 names

Rename SSC macros to use generic names instead of PHY specific names,
so that they can be used to specify SSC modes for both Torrent and
Sierra. Renaming the macros should not affect the things as these are
not being used in any DTS file yet.

Signed-off-by: Swapnil Jakhade <sjakhade@cadence.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211223060137.9252-4-sjakhade@cadence.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/dt-bindings/phy/phy-cadence.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h
index 24fdc9e11bd6..d55fe6e6b936 100644
--- a/include/dt-bindings/phy/phy-cadence.h
+++ b/include/dt-bindings/phy/phy-cadence.h
@@ -6,11 +6,11 @@
 #ifndef _DT_BINDINGS_CADENCE_SERDES_H
 #define _DT_BINDINGS_CADENCE_SERDES_H
 
-/* Torrent */
-#define TORRENT_SERDES_NO_SSC		0
-#define TORRENT_SERDES_EXTERNAL_SSC	1
-#define TORRENT_SERDES_INTERNAL_SSC	2
+#define CDNS_SERDES_NO_SSC		0
+#define CDNS_SERDES_EXTERNAL_SSC	1
+#define CDNS_SERDES_INTERNAL_SSC	2
 
+/* Torrent */
 #define CDNS_TORRENT_REFCLK_DRIVER      0
 #define CDNS_TORRENT_DERIVED_REFCLK	1
 #define CDNS_TORRENT_RECEIVED_REFCLK	2
-- 
cgit v1.2.3


From 637feefb8ac53fbe1147edb707b03dc09839fdf5 Mon Sep 17 00:00:00 2001
From: Swapnil Jakhade <sjakhade@cadence.com>
Date: Thu, 23 Dec 2021 07:01:36 +0100
Subject: dt-bindings: phy: cadence-sierra: Add clock ID for derived reference
 clock

Add clock ID for Sierra derived reference clock.

Signed-off-by: Swapnil Jakhade <sjakhade@cadence.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211223060137.9252-15-sjakhade@cadence.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/dt-bindings/phy/phy-cadence.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h
index d55fe6e6b936..0671991208fc 100644
--- a/include/dt-bindings/phy/phy-cadence.h
+++ b/include/dt-bindings/phy/phy-cadence.h
@@ -18,5 +18,6 @@
 /* Sierra */
 #define CDNS_SIERRA_PLL_CMNLC		0
 #define CDNS_SIERRA_PLL_CMNLC1		1
+#define CDNS_SIERRA_DERIVED_REFCLK	2
 
 #endif /* _DT_BINDINGS_CADENCE_SERDES_H */
-- 
cgit v1.2.3


From 7175f02c4e5f5a9430113ab9ca0fd0ce98b28a51 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Sun, 26 Dec 2021 16:01:27 +0300
Subject: uapi: fix linux/nfc.h userspace compilation errors

Replace sa_family_t with __kernel_sa_family_t to fix the following
linux/nfc.h userspace compilation errors:

/usr/include/linux/nfc.h:266:2: error: unknown type name 'sa_family_t'
  sa_family_t sa_family;
/usr/include/linux/nfc.h:274:2: error: unknown type name 'sa_family_t'
  sa_family_t sa_family;

Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol")
Fixes: d646960f7986 ("NFC: Initial LLCP support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/nfc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index f6e3c8c9c744..aadad43d943a 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -263,7 +263,7 @@ enum nfc_sdp_attr {
 #define NFC_SE_ENABLED  0x1
 
 struct sockaddr_nfc {
-	sa_family_t sa_family;
+	__kernel_sa_family_t sa_family;
 	__u32 dev_idx;
 	__u32 target_idx;
 	__u32 nfc_protocol;
@@ -271,7 +271,7 @@ struct sockaddr_nfc {
 
 #define NFC_LLCP_MAX_SERVICE_NAME 63
 struct sockaddr_nfc_llcp {
-	sa_family_t sa_family;
+	__kernel_sa_family_t sa_family;
 	__u32 dev_idx;
 	__u32 target_idx;
 	__u32 nfc_protocol;
-- 
cgit v1.2.3


From 79b69a83705e621b258ac6d8ae6d3bfdb4b930aa Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Sun, 26 Dec 2021 13:03:47 +0100
Subject: nfc: uapi: use kernel size_t to fix user-space builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix user-space builds if it includes /usr/include/linux/nfc.h before
some of other headers:

  /usr/include/linux/nfc.h:281:9: error: unknown type name ‘size_t’
    281 |         size_t service_name_len;
        |         ^~~~~~

Fixes: d646960f7986 ("NFC: Initial LLCP support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/nfc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index aadad43d943a..4fa4e979e948 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -278,7 +278,7 @@ struct sockaddr_nfc_llcp {
 	__u8 dsap; /* Destination SAP, if known */
 	__u8 ssap; /* Source SAP to be bound to */
 	char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
-	size_t service_name_len;
+	__kernel_size_t service_name_len;
 };
 
 /* NFC socket protocols */
-- 
cgit v1.2.3


From f81bdeaf816142e0729eea0cc84c395ec9673151 Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mlangsdo@redhat.com>
Date: Wed, 22 Dec 2021 16:57:34 +0100
Subject: ACPICA: actypes.h: Expand the ACPI_ACCESS_ definitions

ACPICA commit bc02c76d518135531483dfc276ed28b7ee632ce1

The current ACPI_ACCESS_*_WIDTH defines do not provide a way to
test that size is small enough to not cause an overflow when
applied to a 32-bit integer.

Rather than adding more magic numbers, add ACPI_ACCESS_*_SHIFT,
ACPI_ACCESS_*_MAX, and ACPI_ACCESS_*_DEFAULT #defines and
redefine ACPI_ACCESS_*_WIDTH in terms of the new #defines.

This was inititally reported on Linux where a size of 102 in
ACPI_ACCESS_BIT_WIDTH caused an overflow error in the SPCR
initialization code.

Link: https://github.com/acpica/acpica/commit/bc02c76d
Signed-off-by: Mark Langsdorf <mlangsdo@redhat.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actypes.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index ff8b3c913f21..248242dca28d 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -536,8 +536,14 @@ typedef u64 acpi_integer;
  * Can be used with access_width of struct acpi_generic_address and access_size of
  * struct acpi_resource_generic_register.
  */
-#define ACPI_ACCESS_BIT_WIDTH(size)     (1 << ((size) + 2))
-#define ACPI_ACCESS_BYTE_WIDTH(size)    (1 << ((size) - 1))
+#define ACPI_ACCESS_BIT_SHIFT		2
+#define ACPI_ACCESS_BYTE_SHIFT		-1
+#define ACPI_ACCESS_BIT_MAX		(31 - ACPI_ACCESS_BIT_SHIFT)
+#define ACPI_ACCESS_BYTE_MAX		(31 - ACPI_ACCESS_BYTE_SHIFT)
+#define ACPI_ACCESS_BIT_DEFAULT		(8 - ACPI_ACCESS_BIT_SHIFT)
+#define ACPI_ACCESS_BYTE_DEFAULT	(8 - ACPI_ACCESS_BYTE_SHIFT)
+#define ACPI_ACCESS_BIT_WIDTH(size)	(1 << ((size) + ACPI_ACCESS_BIT_SHIFT))
+#define ACPI_ACCESS_BYTE_WIDTH(size)	(1 << ((size) + ACPI_ACCESS_BYTE_SHIFT))
 
 /*******************************************************************************
  *
-- 
cgit v1.2.3


From ca25f92b72d25457653dbf2a81f322235804fb05 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Wed, 22 Dec 2021 17:21:25 +0100
Subject: ACPICA: Use original data_table_region pointer for accesses

ACPICA commit d9eb82bd7515989f0b29d79deeeb758db4d6529c

Currently the pointer to the table is cast to acpi_physical_address and
later cast back to a pointer to be dereferenced. Whether or not this is
supported is implementation-defined.

On CHERI, and thus Arm's experimental Morello prototype architecture,
pointers are represented as capabilities, which are unforgeable bounded
pointers, providing always-on fine-grained spatial memory safety. This
means that any pointer cast to a plain integer will lose all its
associated metadata, and when cast back to a pointer it will give a
null-derived pointer (one that has the same metadata as null but an
address equal to the integer) that will trap on any dereference. As a
result, this is an implementation where acpi_physical_address cannot be
used as a hack to store real pointers.

Thus, add a new field to struct acpi_object_region to store the pointer for
table regions, and propagate it to acpi_ex_data_table_space_handler via the
region context, to use a more portable implementation that supports
CHERI.

Link: https://github.com/acpica/acpica/commit/d9eb82bd
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actypes.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 248242dca28d..700c2449e85a 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -1221,6 +1221,10 @@ struct acpi_mem_space_context {
 	struct acpi_mem_mapping *first_mm;
 };
 
+struct acpi_data_table_space_context {
+	void *pointer;
+};
+
 /*
  * struct acpi_memory_list is used only if the ACPICA local cache is enabled
  */
-- 
cgit v1.2.3


From 5d6e59665d8bb0f8fa4d47726a93326f8ddfb448 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Wed, 22 Dec 2021 17:22:28 +0100
Subject: ACPICA: Use original pointer for virtual origin tables

ACPICA commit dfa3feffa8f760b686207d09dc880cd2f26c72af

Currently the pointer to the table is cast to acpi_physical_address and
later cast back to a pointer to be dereferenced. Whether or not this is
supported is implementation-defined.

On CHERI, and thus Arm's experimental Morello prototype architecture,
pointers are represented as capabilities, which are unforgeable bounded
pointers, providing always-on fine-grained spatial memory safety. This
means that any pointer cast to a plain integer will lose all its
associated metadata, and when cast back to a pointer it will give a
null-derived pointer (one that has the same metadata as null but an
address equal to the integer) that will trap on any dereference. As a
result, this is an implementation where acpi_physical_address cannot be
used as a hack to store real pointers.

Thus, alter the lifecycle of table descriptors. Internal physical tables
keep the current behaviour where only the address is set on install, and
the pointer is set on acquire. Virtual tables (internal and external)
now store the pointer on initialisation and use that on acquire (which
will redundantly set *table_ptr to itself, but changing that is both
unnecessary and overly complicated as acpi_tb_acquire_table is called with
both a pointer to a variable and a pointer to Table->Pointer itself).

This requires propagating the (possible) table pointer everywhere in
order to make sure pointers make it through to acpi_tb_acquire_temp_table,
which requires a change to the acpi_install_table interface. Instead of
taking an ACPI_PHYSADDR_TYPE and a boolean indicating whether it's
physical or virtual, it is now split into acpi_install_table (that takes
an external virtual table pointer) and acpi_install_physical_table (that
takes an ACPI_PHYSADDR_TYPE for an internal physical table address).
This also has the benefit of providing a cleaner API.

Link: https://github.com/acpica/acpica/commit/dfa3feff
Signed-off-by: Bob Moore <robert.moore@intel.com>
[ rjw: Adjust the code in tables.c to match interface changes ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 73ba13914321..987bb0aa042e 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -454,9 +454,11 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
  * ACPI table load/unload interfaces
  */
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
-			    acpi_install_table(acpi_physical_address address,
-					       u8 physical))
+			    acpi_install_table(struct acpi_table_header *table))
 
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			    acpi_install_physical_table(acpi_physical_address
+							address))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_load_table(struct acpi_table_header *table,
 					    u32 *table_idx))
-- 
cgit v1.2.3


From 339651be3704f336634d90c000a7778b86e1be99 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Wed, 22 Dec 2021 17:23:28 +0100
Subject: ACPICA: Macros: Remove ACPI_PHYSADDR_TO_PTR

ACPICA commit 52abebd410945ec55afb4dd8b7150e8a39b5c960

This macro was only ever used when stuffing pointers into physical
addresses and trying to later reconstruct the pointer, which is
implementation-defined as to whether that can be done. Now that all such
operations are gone, the macro is unused, and should be removed to avoid
such practices being reintroduced.

Link: https://github.com/acpica/acpica/commit/52abebd4
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actypes.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 700c2449e85a..83a7ee1fbd7c 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -509,7 +509,6 @@ typedef u64 acpi_integer;
 #define ACPI_TO_POINTER(i)              ACPI_CAST_PTR (void, (acpi_size) (i))
 #define ACPI_TO_INTEGER(p)              ACPI_PTR_DIFF (p, (void *) 0)
 #define ACPI_OFFSET(d, f)               ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0)
-#define ACPI_PHYSADDR_TO_PTR(i)         ACPI_TO_POINTER(i)
 #define ACPI_PTR_TO_PHYSADDR(i)         ACPI_TO_INTEGER(i)
 
 /* Optimizations for 4-character (32-bit) acpi_name manipulation */
-- 
cgit v1.2.3


From e4a07f5acd730802ada629d52a29c4873f9826a4 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Dec 2021 17:25:40 +0100
Subject: ACPICA: iASL/Disassembler: Additional support for NHLT table

ACPICA commit 0420852ffc520b81960e877852703b739c16025c

Added support for Vendor-defined microphone arrays and SNR
(signal-to-noise) extension.

Link: https://github.com/acpica/acpica/commit/0420852f
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 72 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 71ca090fd61b..380bff6bb2fd 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1495,12 +1495,10 @@ struct acpi_nhlt_device_specific_config_a {
 
 /* Values for Config Type above */
 
-#define ACPI_NHLT_TYPE_MIC_ARRAY            0x01
-#define ACPI_NHLT_TYPE_GENERIC              0x00
-
-/* Mask for Extension field of array_type */
-
-#define ACPI_NHLT_ARRAY_TYPE_MASK           0x10
+#define ACPI_NHLT_CONFIG_TYPE_GENERIC              0x00
+#define ACPI_NHLT_CONFIG_TYPE_MIC_ARRAY            0x01
+#define ACPI_NHLT_CONFIG_TYPE_RENDER_FEEDBACK      0x03
+#define ACPI_NHLT_CONFIG_TYPE_RESERVED             0x04	/* 4 and above are reserved */
 
 struct acpi_nhlt_device_specific_config_b {
 	u32 capabilities_size;
@@ -1511,6 +1509,11 @@ struct acpi_nhlt_device_specific_config_c {
 	u8 virtual_slot;
 };
 
+struct acpi_nhlt_render_device_specific_config {
+	u32 capabilities_size;
+	u8 virtual_slot;
+};
+
 struct acpi_nhlt_wave_extensible {
 	u16 format_tag;
 	u16 channel_count;
@@ -1573,17 +1576,22 @@ struct acpi_nhlt_mic_device_specific_config {
 
 /* Values for array_type_ext above */
 
-#define SMALL_LINEAR_2ELEMENT               0x0A
-#define BIG_LINEAR_2ELEMENT                 0x0B
-#define FIRST_GEOMETRY_LINEAR_4ELEMENT      0x0C
-#define PLANAR_LSHAPED_4ELEMENT             0x0D
-#define SECOND_GEOMETRY_LINEAR_4ELEMENT     0x0E
-#define VENDOR_DEFINED                      0x0F
-#define ARRAY_TYPE_MASK                     0x0F
-#define ARRAY_TYPE_EXT_MASK                 0x10
+#define ACPI_NHLT_ARRAY_TYPE_RESERVED               0x09	// 9 and below are reserved
+#define ACPI_NHLT_SMALL_LINEAR_2ELEMENT             0x0A
+#define ACPI_NHLT_BIG_LINEAR_2ELEMENT               0x0B
+#define ACPI_NHLT_FIRST_GEOMETRY_LINEAR_4ELEMENT    0x0C
+#define ACPI_NHLT_PLANAR_LSHAPED_4ELEMENT           0x0D
+#define ACPI_NHLT_SECOND_GEOMETRY_LINEAR_4ELEMENT   0x0E
+#define ACPI_NHLT_VENDOR_DEFINED                    0x0F
+#define ACPI_NHLT_ARRAY_TYPE_MASK                   0x0F
+#define ACPI_NHLT_ARRAY_TYPE_EXT_MASK               0x10
+
+#define ACPI_NHLT_NO_EXTENSION                      0x0
+#define ACPI_NHLT_MIC_SNR_SENSITIVITY_EXT           (1<<4)
 
-#define NO_EXTENSION                        0x0
-#define MIC_SNR_SENSITIVITY_EXT             0x1
+struct acpi_nhlt_vendor_mic_count {
+	u8 microphone_count;
+};
 
 struct acpi_nhlt_vendor_mic_config {
 	u8 type;
@@ -1603,22 +1611,25 @@ struct acpi_nhlt_vendor_mic_config {
 
 /* Values for Type field above */
 
-#define MIC_OMNIDIRECTIONAL                 0
-#define MIC_SUBCARDIOID                     1
-#define MIC_CARDIOID                        2
-#define MIC_SUPER_CARDIOID                  3
-#define MIC_HYPER_CARDIOID                  4
-#define MIC_8_SHAPED                        5
-#define MIC_VENDOR_DEFINED                  7
+#define ACPI_NHLT_MIC_OMNIDIRECTIONAL       0
+#define ACPI_NHLT_MIC_SUBCARDIOID           1
+#define ACPI_NHLT_MIC_CARDIOID              2
+#define ACPI_NHLT_MIC_SUPER_CARDIOID        3
+#define ACPI_NHLT_MIC_HYPER_CARDIOID        4
+#define ACPI_NHLT_MIC_8_SHAPED              5
+#define ACPI_NHLT_MIC_RESERVED6             6	// 6 is reserved
+#define ACPI_NHLT_MIC_VENDOR_DEFINED        7
+#define ACPI_NHLT_MIC_RESERVED              8	// 8 and above are reserved
 
 /* Values for Panel field above */
 
-#define MIC_TOP                             0
-#define MIC_BOTTOM                          1
-#define MIC_LEFT                            2
-#define MIC_RIGHT                           3
-#define MIC_FRONT                           4
-#define MIC_REAR                            5
+#define ACPI_NHLT_MIC_POSITION_TOP          0
+#define ACPI_NHLT_MIC_POSITION_BOTTOM       1
+#define ACPI_NHLT_MIC_POSITION_LEFT         2
+#define ACPI_NHLT_MIC_POSITION_RIGHT        3
+#define ACPI_NHLT_MIC_POSITION_FRONT        4
+#define ACPI_NHLT_MIC_POSITION_BACK         5
+#define ACPI_NHLT_MIC_POSITION_RESERVED     6	// 6 and above are reserved
 
 struct acpi_nhlt_vendor_mic_device_specific_config {
 	struct acpi_nhlt_mic_device_specific_config mic_array_device_config;
@@ -1633,8 +1644,9 @@ struct acpi_nhlt_mic_snr_sensitivity_extension {
 	u32 sensitivity;
 };
 
+/* Render device with feedback */
+
 struct acpi_nhlt_render_feedback_device_specific_config {
-	struct acpi_nhlt_device_specific_config device_config;
 	u8 feedback_virtual_slot;	// render slot in case of capture
 	u16 feedback_channels;	// informative only
 	u16 feedback_valid_bits_per_sample;
-- 
cgit v1.2.3


From 00395b74d57ff81795ec392627db7e1764c94941 Mon Sep 17 00:00:00 2001
From: Shuuichirou Ishii <ishii.shuuichir@fujitsu.com>
Date: Wed, 22 Dec 2021 17:28:34 +0100
Subject: ACPICA: Fix AEST Processor generic resource substructure data field
 byte length

ACPICA commit 13b9327761955f6e1e5dbf748b3112940c0dc539

The byte length of the Data field in the AEST Processor generic resource
substructure defined in ACPI for the Armv8 RAS Extensions 1.1 is 4Byte.
However, it is defined as a pointer type, and on a 64-bit machine,
it is interpreted as 8 bytes. Therefore, it is changed from a pointer
type unsigned integer 1 byte to an unsigned integer 4 bytes.

Link: https://github.com/acpica/acpica/commit/13b93277
Signed-off-by: Shuuichirou Ishii <ishii.shuuichir@fujitsu.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 380bff6bb2fd..1b0fac6ffc3f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -154,7 +154,7 @@ typedef struct acpi_aest_processor_tlb {
 /* 2R: Processor Generic Resource Substructure */
 
 typedef struct acpi_aest_processor_generic {
-	u8 *resource;
+	u32 resource;
 
 } acpi_aest_processor_generic;
 
-- 
cgit v1.2.3


From 0acf24ad7e10f547809faefb8069f8f5482eb4d9 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Wed, 22 Dec 2021 17:32:54 +0100
Subject: ACPICA: Add support for PCC Opregion special context data

ACPICA commit 55526e8a6133cbf5a9cc0fb75a95dbbac6eb98e6

PCC Opregion added in ACPIC 6.3 requires special context data similar
to GPIO and Generic Serial Bus as it needs to know the internal PCC
buffer and its length as well as the PCC channel index when the opregion
handler is being executed by the OSPM.

Lets add support for the special context data needed by PCC Opregion.

Link: https://github.com/acpica/acpica/commit/55526e8a
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actypes.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 83a7ee1fbd7c..69e89d572b9e 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -1103,6 +1103,14 @@ struct acpi_connection_info {
 	u8 access_length;
 };
 
+/* Special Context data for PCC Opregion (ACPI 6.3) */
+
+struct acpi_pcc_info {
+	u8 subspace_id;
+	u16 length;
+	u8 *internal_buffer;
+};
+
 typedef
 acpi_status (*acpi_adr_space_setup) (acpi_handle region_handle,
 				     u32 function,
-- 
cgit v1.2.3


From 2de6bb92ebbb0e8803554bb24391514b552cb481 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Dec 2021 17:36:30 +0100
Subject: ACPICA: iASL: Add TDEL table to both compiler/disassembler

ACPICA commit 403f9965aba7ff9d2ed5b41bbffdd2a1ed0f596f

Added struct acpi_pcc_info to acpi_src.

Link: https://github.com/acpica/acpica/commit/403f9965
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 1b0fac6ffc3f..18b7b9138607 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -48,6 +48,7 @@
 #define ACPI_SIG_SDEI           "SDEI"	/* Software Delegated Exception Interface Table */
 #define ACPI_SIG_SDEV           "SDEV"	/* Secure Devices table */
 #define ACPI_SIG_SVKL           "SVKL"	/* Storage Volume Key Location Table */
+#define ACPI_SIG_TDEL           "TDEL"	/* TD Event Log Table */
 
 /*
  * All tables must be byte-packed to match the ACPI specification, since
@@ -2467,6 +2468,22 @@ enum acpi_svkl_format {
 	ACPI_SVKL_FORMAT_RESERVED = 1	/* 1 and greater are reserved */
 };
 
+/*******************************************************************************
+ *
+ * TDEL - TD-Event Log
+ *        From: "Guest-Host-Communication Interface (GHCI) for Intel
+ *        Trust Domain Extensions (Intel TDX)".
+ *        September 2020
+ *
+ ******************************************************************************/
+
+struct acpi_table_tdel {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 reserved;
+	u64 log_area_minimum_length;
+	u64 log_area_start_address;
+};
+
 /* Reset to default packing */
 
 #pragma pack()
-- 
cgit v1.2.3


From 5579649e7eb756a4e3d5784b6958374e5bfc41de Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Date: Wed, 22 Dec 2021 17:37:21 +0100
Subject: ACPICA: iASL: Add suppport for AGDI table

ACPICA commit cf36a6d658ca5aa8c329c2edfc3322c095ffd844

Add support for Arm Generic Diagnostic Dump and Reset Interface, which is
described by "ACPI for Arm Components 1.1 Platform Design Document"
ARM DEN0093.

Add the necessary types in the ACPICA header files and support for
compiling and decompiling the table.

Link: https://github.com/acpica/acpica/commit/cf36a6d6
Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 18b7b9138607..57481f6f1ca5 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -24,6 +24,7 @@
  * file. Useful because they make it more difficult to inadvertently type in
  * the wrong signature.
  */
+#define ACPI_SIG_AGDI           "AGDI"	/* Arm Generic Diagnostic Dump and Reset Device Interface */
 #define ACPI_SIG_BDAT           "BDAT"	/* BIOS Data ACPI Table */
 #define ACPI_SIG_IORT           "IORT"	/* IO Remapping Table */
 #define ACPI_SIG_IVRS           "IVRS"	/* I/O Virtualization Reporting Structure */
@@ -238,6 +239,25 @@ typedef struct acpi_aest_node_interrupt {
 #define ACPI_AEST_NODE_ERROR_RECOVERY       1
 #define ACPI_AEST_XRUPT_RESERVED            2	/* 2 and above are reserved */
 
+/*******************************************************************************
+ * AGDI - Arm Generic Diagnostic Dump and Reset Device Interface
+ *
+ * Conforms to "ACPI for Arm Components 1.1, Platform Design Document"
+ * ARM DEN0093 v1.1
+ *
+ ******************************************************************************/
+struct acpi_table_agdi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 flags;
+	u8 reserved[3];
+	u32 sdei_event;
+	u32 gsiv;
+};
+
+/* Mask for Flags field above */
+
+#define ACPI_AGDI_SIGNALING_MODE (1)
+
 /*******************************************************************************
  *
  * BDAT - BIOS Data ACPI Table
-- 
cgit v1.2.3


From 0c9a672729d62d27d0c9993e106707be0b0c2bc0 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Dec 2021 17:37:56 +0100
Subject: ACPICA: iASL/NHLT table: "Specific Data" field support

ACPICA commit 26f8c721fb01e4a26eec8c85dffcbe950d5e61a9

Add support for optional "Specific Data" field for the optional
Linux-specific structure that appears at the end of an Endpoint
Descriptor.

Link: https://github.com/acpica/acpica/commit/26f8c721
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 57481f6f1ca5..16847c8d9d5f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1683,7 +1683,10 @@ struct acpi_nhlt_linux_specific_data {
 	u8 device_id[16];
 	u8 device_instance_id;
 	u8 device_port_id;
-	u8 filler[18];
+};
+
+struct acpi_nhlt_linux_specific_data_b {
+	u8 specific_data[18];
 };
 
 struct acpi_nhlt_table_terminator {
-- 
cgit v1.2.3


From c95545a036705e1a78b40a83701c8d3868898114 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Dec 2021 17:38:29 +0100
Subject: ACPICA: Update version to 20211217

ACPICA commit 90088defcb99e122edf41038ae5c901206c86dc9

Version 20211217.

Link: https://github.com/acpica/acpica/commit/90088def
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 987bb0aa042e..7417731472b7 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20210930
+#define ACPI_CA_VERSION                 0x20211217
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 1882de7fc56c2b0ea91dd9fd9922d434fc3feb15 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Wed, 22 Dec 2021 12:31:03 +0800
Subject: efi: Introduce EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER and
 corresponding structures

Platform Firmware Runtime Update image starts with UEFI headers, and the
headers are defined in UEFI specification, but some of them have not been
defined in the kernel yet.

For example, the header layout of a capsule file looks like this:

EFI_CAPSULE_HEADER
EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER
EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER
EFI_FIRMWARE_IMAGE_AUTHENTICATION

These structures would be used by the Platform Firmware Runtime Update
driver to parse the format of capsule file to verify if the corresponding
version number is valid. In this way, if the user provides an invalid
capsule image, the kernel could be used as a guard to reject it, without
switching to the Management Mode (which might be costly).

EFI_CAPSULE_HEADER has been defined in the kernel, but the other
structures have not been defined yet, so do that. Besides,
EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER and
EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER are required to be packed
in the uefi specification. For this reason, use the __packed attribute
to indicate to the compiler that the entire structure can appear
misaligned in memory (as suggested by Ard) in case one of them follows
the other directly in a capsule header.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/efi.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index dbd39b20e034..80e970f7e6f8 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -148,6 +148,52 @@ typedef struct {
 	u32 imagesize;
 } efi_capsule_header_t;
 
+/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER */
+struct efi_manage_capsule_header {
+	u32 ver;
+	u16 emb_drv_cnt;
+	u16 payload_cnt;
+	/*
+	 * Variable-size array of the size given by the sum of
+	 * emb_drv_cnt and payload_cnt.
+	 */
+	u64 offset_list[];
+} __packed;
+
+/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER */
+struct efi_manage_capsule_image_header {
+	u32 ver;
+	efi_guid_t image_type_id;
+	u8 image_index;
+	u8 reserved_bytes[3];
+	u32 image_size;
+	u32 vendor_code_size;
+	/* hw_ins was introduced in version 2 */
+	u64 hw_ins;
+	/* capsule_support was introduced in version 3 */
+	u64 capsule_support;
+} __packed;
+
+/* WIN_CERTIFICATE */
+struct win_cert {
+	u32 len;
+	u16 rev;
+	u16 cert_type;
+};
+
+/* WIN_CERTIFICATE_UEFI_GUID */
+struct win_cert_uefi_guid {
+	struct win_cert	hdr;
+	efi_guid_t cert_type;
+	u8 cert_data[];
+};
+
+/* EFI_FIRMWARE_IMAGE_AUTHENTICATION */
+struct efi_image_auth {
+	u64 mon_count;
+	struct win_cert_uefi_guid auth_info;
+};
+
 /*
  * EFI capsule flags
  */
-- 
cgit v1.2.3


From 0db89fa243e5edc5de38c88b369e4c3755c5fb74 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Wed, 22 Dec 2021 12:31:41 +0800
Subject: ACPI: Introduce Platform Firmware Runtime Update device driver

Introduce the pfr_update driver which can be used for Platform Firmware
Runtime code injection and driver update [1].

The user is expected to provide the EFI capsule, and pass it to the
driver by writing the capsule to a device special file. The capsule
is transferred by the driver to the platform firmware with the help
of an ACPI _DSM method under the special ACPI Platform Firmware
Runtime Update device (INTC1080), and the actual firmware update is
carried out by the low-level Management Mode code in the platform
firmware.

This change allows certain pieces of the platform firmware to be
updated on the fly while the system is running (runtime) without the
need to restart it, which is key in the cases when the system needs to
be available 100% of the time and it cannot afford the downtime related
to restarting it, or when the work carried out by the system is
particularly important, so it cannot be interrupted, and it is not
practical to wait until it is complete.

Link: https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf # [1]
Tested-by: Hongyu Ning <hongyu.ning@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/uapi/linux/pfrut.h | 174 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 include/uapi/linux/pfrut.h

(limited to 'include')

diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h
new file mode 100644
index 000000000000..fa97e80a93b7
--- /dev/null
+++ b/include/uapi/linux/pfrut.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Platform Firmware Runtime Update header
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+#ifndef __PFRUT_H__
+#define __PFRUT_H__
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define PFRUT_IOCTL_MAGIC 0xEE
+
+/**
+ * PFRU_IOC_SET_REV - _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int)
+ *
+ * Return:
+ * * 0			- success
+ * * -EFAULT		- fail to read the revision id
+ * * -EINVAL		- user provides an invalid revision id
+ *
+ * Set the Revision ID for Platform Firmware Runtime Update.
+ */
+#define PFRU_IOC_SET_REV _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int)
+
+/**
+ * PFRU_IOC_STAGE - _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int)
+ *
+ * Return:
+ * * 0			- success
+ * * -EINVAL		- stage phase returns invalid result
+ *
+ * Stage a capsule image from communication buffer and perform authentication.
+ */
+#define PFRU_IOC_STAGE _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int)
+
+/**
+ * PFRU_IOC_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int)
+ *
+ * Return:
+ * * 0			- success
+ * * -EINVAL		- activate phase returns invalid result
+ *
+ * Activate a previously staged capsule image.
+ */
+#define PFRU_IOC_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int)
+
+/**
+ * PFRU_IOC_STAGE_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int)
+ *
+ * Return:
+ * * 0			- success
+ * * -EINVAL		- stage/activate phase returns invalid result.
+ *
+ * Perform both stage and activation action.
+ */
+#define PFRU_IOC_STAGE_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int)
+
+/**
+ * PFRU_IOC_QUERY_CAP - _IOR(PFRUT_IOCTL_MAGIC, 0x05,
+ *			     struct pfru_update_cap_info)
+ *
+ * Return:
+ * * 0			- success
+ * * -EINVAL		- query phase returns invalid result
+ * * -EFAULT		- the result fails to be copied to userspace
+ *
+ * Retrieve information on the Platform Firmware Runtime Update capability.
+ * The information is a struct pfru_update_cap_info.
+ */
+#define PFRU_IOC_QUERY_CAP _IOR(PFRUT_IOCTL_MAGIC, 0x05, struct pfru_update_cap_info)
+
+/**
+ * struct pfru_payload_hdr - Capsule file payload header.
+ *
+ * @sig: Signature of this capsule file.
+ * @hdr_version: Revision of this header structure.
+ * @hdr_size: Size of this header, including the OemHeader bytes.
+ * @hw_ver: The supported firmware version.
+ * @rt_ver: Version of the code injection image.
+ * @platform_id: A platform specific GUID to specify the platform what
+ *               this capsule image support.
+ */
+struct pfru_payload_hdr {
+	__u32 sig;
+	__u32 hdr_version;
+	__u32 hdr_size;
+	__u32 hw_ver;
+	__u32 rt_ver;
+	__u8 platform_id[16];
+};
+
+enum pfru_dsm_status {
+	DSM_SUCCEED = 0,
+	DSM_FUNC_NOT_SUPPORT = 1,
+	DSM_INVAL_INPUT = 2,
+	DSM_HARDWARE_ERR = 3,
+	DSM_RETRY_SUGGESTED = 4,
+	DSM_UNKNOWN = 5,
+	DSM_FUNC_SPEC_ERR = 6,
+};
+
+/**
+ * struct pfru_update_cap_info - Runtime update capability information.
+ *
+ * @status: Indicator of whether this query succeed.
+ * @update_cap: Bitmap to indicate whether the feature is supported.
+ * @code_type: A buffer containing an image type GUID.
+ * @fw_version: Platform firmware version.
+ * @code_rt_version: Code injection runtime version for anti-rollback.
+ * @drv_type: A buffer containing an image type GUID.
+ * @drv_rt_version: The version of the driver update runtime code.
+ * @drv_svn: The secure version number(SVN) of the driver update runtime code.
+ * @platform_id: A buffer containing a platform ID GUID.
+ * @oem_id: A buffer containing an OEM ID GUID.
+ * @oem_info_len: Length of the buffer containing the vendor specific information.
+ */
+struct pfru_update_cap_info {
+	__u32 status;
+	__u32 update_cap;
+
+	__u8 code_type[16];
+	__u32 fw_version;
+	__u32 code_rt_version;
+
+	__u8 drv_type[16];
+	__u32 drv_rt_version;
+	__u32 drv_svn;
+
+	__u8 platform_id[16];
+	__u8 oem_id[16];
+
+	__u32 oem_info_len;
+};
+
+/**
+ * struct pfru_com_buf_info - Communication buffer information.
+ *
+ * @status: Indicator of whether this query succeed.
+ * @ext_status: Implementation specific query result.
+ * @addr_lo: Low 32bit physical address of the communication buffer to hold
+ *           a runtime update package.
+ * @addr_hi: High 32bit physical address of the communication buffer to hold
+ *           a runtime update package.
+ * @buf_size: Maximum size in bytes of the communication buffer.
+ */
+struct pfru_com_buf_info {
+	__u32 status;
+	__u32 ext_status;
+	__u64 addr_lo;
+	__u64 addr_hi;
+	__u32 buf_size;
+};
+
+/**
+ * struct pfru_updated_result - Platform firmware runtime update result information.
+ * @status: Indicator of whether this update succeed.
+ * @ext_status: Implementation specific update result.
+ * @low_auth_time: Low 32bit value of image authentication time in nanosecond.
+ * @high_auth_time: High 32bit value of image authentication time in nanosecond.
+ * @low_exec_time: Low 32bit value of image execution time in nanosecond.
+ * @high_exec_time: High 32bit value of image execution time in nanosecond.
+ */
+struct pfru_updated_result {
+	__u32 status;
+	__u32 ext_status;
+	__u64 low_auth_time;
+	__u64 high_auth_time;
+	__u64 low_exec_time;
+	__u64 high_exec_time;
+};
+
+#endif /* __PFRUT_H__ */
-- 
cgit v1.2.3


From b0013e037a8b07772c74ce24f1ae4743b30fc3cf Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Wed, 22 Dec 2021 12:32:02 +0800
Subject: ACPI: Introduce Platform Firmware Runtime Telemetry driver

This driver allows user space to fetch telemetry data from the
firmware with the help of the Platform Firmware Runtime Telemetry
interface.

Both PFRU and PFRT are based on ACPI _DSM interfaces located under
special device objects in the ACPI Namespace, but these interfaces
are different from each other, so it is better to provide a separate
driver from each of them, even though they share some common
definitions and naming conventions.

Tested-by: Hongyu Ning <hongyu.ning@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/uapi/linux/pfrut.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h
index fa97e80a93b7..42fa15f8310d 100644
--- a/include/uapi/linux/pfrut.h
+++ b/include/uapi/linux/pfrut.h
@@ -171,4 +171,92 @@ struct pfru_updated_result {
 	__u64 high_exec_time;
 };
 
+/**
+ * struct pfrt_log_data_info - Log Data from telemetry service.
+ * @status: Indicator of whether this update succeed.
+ * @ext_status: Implementation specific update result.
+ * @chunk1_addr_lo: Low 32bit physical address of the telemetry data chunk1
+ *                  starting address.
+ * @chunk1_addr_hi: High 32bit physical address of the telemetry data chunk1
+ *                  starting address.
+ * @chunk2_addr_lo: Low 32bit physical address of the telemetry data chunk2
+ *                  starting address.
+ * @chunk2_addr_hi: High 32bit physical address of the telemetry data chunk2
+ *                  starting address.
+ * @max_data_size: Maximum supported size of data of all data chunks combined.
+ * @chunk1_size: Data size in bytes of the telemetry data chunk1 buffer.
+ * @chunk2_size: Data size in bytes of the telemetry data chunk2 buffer.
+ * @rollover_cnt: Number of times telemetry data buffer is overwritten
+ *                since telemetry buffer reset.
+ * @reset_cnt: Number of times telemetry services resets that results in
+ *             rollover count and data chunk buffers are reset.
+ */
+struct pfrt_log_data_info {
+	__u32 status;
+	__u32 ext_status;
+	__u64 chunk1_addr_lo;
+	__u64 chunk1_addr_hi;
+	__u64 chunk2_addr_lo;
+	__u64 chunk2_addr_hi;
+	__u32 max_data_size;
+	__u32 chunk1_size;
+	__u32 chunk2_size;
+	__u32 rollover_cnt;
+	__u32 reset_cnt;
+};
+
+/**
+ * struct pfrt_log_info - Telemetry log information.
+ * @log_level: The telemetry log level.
+ * @log_type: The telemetry log type(history and execution).
+ * @log_revid: The telemetry log revision id.
+ */
+struct pfrt_log_info {
+	__u32 log_level;
+	__u32 log_type;
+	__u32 log_revid;
+};
+
+/**
+ * PFRT_LOG_IOC_SET_INFO - _IOW(PFRUT_IOCTL_MAGIC, 0x06,
+ *				struct pfrt_log_info)
+ *
+ * Return:
+ * * 0			- success
+ * * -EFAULT		- fail to get the setting parameter
+ * * -EINVAL		- fail to set the log level
+ *
+ * Set the PFRT log level and log type. The input information is
+ * a struct pfrt_log_info.
+ */
+#define PFRT_LOG_IOC_SET_INFO _IOW(PFRUT_IOCTL_MAGIC, 0x06, struct pfrt_log_info)
+
+/**
+ * PFRT_LOG_IOC_GET_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x07,
+ *				struct pfrt_log_info)
+ *
+ * Return:
+ * * 0			- success
+ * * -EINVAL		- fail to get the log level
+ * * -EFAULT		- fail to copy the result back to userspace
+ *
+ * Retrieve log level and log type of the telemetry. The information is
+ * a struct pfrt_log_info.
+ */
+#define PFRT_LOG_IOC_GET_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x07, struct pfrt_log_info)
+
+/**
+ * PFRT_LOG_IOC_GET_DATA_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x08,
+ *				     struct pfrt_log_data_info)
+ *
+ * Return:
+ * * 0			- success
+ * * -EINVAL		- fail to get the log buffer information
+ * * -EFAULT		- fail to copy the log buffer information to userspace
+ *
+ * Retrieve data information about the telemetry. The information
+ * is a struct pfrt_log_data_info.
+ */
+#define PFRT_LOG_IOC_GET_DATA_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x08, struct pfrt_log_data_info)
+
 #endif /* __PFRUT_H__ */
-- 
cgit v1.2.3


From cf6299b6101903c31bddb0065804b2121ed510c7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 27 Dec 2021 17:39:24 +0100
Subject: kobject: remove kset from struct kset_uevent_ops callbacks

There is no need to pass the pointer to the kset in the struct
kset_uevent_ops callbacks as no one uses it, so just remove that pointer
entirely.

Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Wedson Almeida Filho <wedsonaf@google.com>
Link: https://lore.kernel.org/r/20211227163924.3970661-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 683172b2e094..ad90b49824dc 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -153,10 +153,9 @@ struct kobj_uevent_env {
 };
 
 struct kset_uevent_ops {
-	int (* const filter)(struct kset *kset, struct kobject *kobj);
-	const char *(* const name)(struct kset *kset, struct kobject *kobj);
-	int (* const uevent)(struct kset *kset, struct kobject *kobj,
-		      struct kobj_uevent_env *env);
+	int (* const filter)(struct kobject *kobj);
+	const char *(* const name)(struct kobject *kobj);
+	int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env);
 };
 
 struct kobj_attribute {
-- 
cgit v1.2.3


From d6b9c679bbac1d1d2fcac64391b4cadb91763a6f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 12 Nov 2021 14:46:32 -0800
Subject: watchdog: bcm7038_wdt: Support platform data configuration

The BCM7038 watchdog driver needs to be able to obtain a specific clock
name on BCM63xx platforms which is the "periph" clock ticking at 50MHz.
make it possible to specify the clock name to obtain via platform data.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20211112224636.395101-4-f.fainelli@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 include/linux/platform_data/bcm7038_wdt.h | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 include/linux/platform_data/bcm7038_wdt.h

(limited to 'include')

diff --git a/include/linux/platform_data/bcm7038_wdt.h b/include/linux/platform_data/bcm7038_wdt.h
new file mode 100644
index 000000000000..e18cfd9ec8f9
--- /dev/null
+++ b/include/linux/platform_data/bcm7038_wdt.h
@@ -0,0 +1,8 @@
+#ifndef __BCM7038_WDT_PDATA_H
+#define __BCM7038_WDT_PDATA_H
+
+struct bcm7038_wdt_platform_data {
+	const char *clk_name;
+};
+
+#endif /* __BCM7038_WDT_PDATA_H */
-- 
cgit v1.2.3


From db3c65bc3a1308db8c914b2bf477b5a36005c3d3 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Mon, 27 Dec 2021 19:31:54 -0800
Subject: Drivers: hv: Fix definition of hypercall input & output arg variables

The percpu variables hyperv_pcpu_input_arg and hyperv_pcpu_output_arg
have been incorrectly defined since their inception.  The __percpu
qualifier should be associated with the void * (i.e., a pointer), not
with the target of the pointer. This distinction makes no difference
to gcc and the generated code, but sparse correctly complains.  Fix
the definitions in the interest of general correctness in addition
to making sparse happy.

No functional change.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1640662315-22260-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 include/asm-generic/mshyperv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 94e73ba129c5..c08758b6b364 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -49,8 +49,8 @@ struct ms_hyperv_info {
 };
 extern struct ms_hyperv_info ms_hyperv;
 
-extern void  __percpu  **hyperv_pcpu_input_arg;
-extern void  __percpu  **hyperv_pcpu_output_arg;
+extern void * __percpu *hyperv_pcpu_input_arg;
+extern void * __percpu *hyperv_pcpu_output_arg;
 
 extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
 extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
-- 
cgit v1.2.3


From b92e301633f0f454aa1cfedac2e096bb9649b367 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sat, 18 Dec 2021 16:25:53 +0100
Subject: mfd: ntxec: Change return type of ntxec_reg8 from __be16 to u16
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Register values in NTXEC are big-endian on the I2C bus, but the regmap
subsystem handles the conversion between CPU-endian and big-endian data
internally. ntxec_reg8 should thus return u16, not __be16.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211218152553.744615-1-j.neuschaefer@gmx.net
---
 include/linux/mfd/ntxec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h
index 26ab3b8eb612..cc6f07bfa2b3 100644
--- a/include/linux/mfd/ntxec.h
+++ b/include/linux/mfd/ntxec.h
@@ -26,7 +26,7 @@ struct ntxec {
  * This convenience function converts an 8-bit value to 16-bit for use in the
  * second kind of register.
  */
-static inline __be16 ntxec_reg8(u8 value)
+static inline u16 ntxec_reg8(u8 value)
 {
 	return value << 8;
 }
-- 
cgit v1.2.3


From b6459415b384cb829f0b2a4268f211c789f6cf0b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 28 Dec 2021 16:49:13 -0800
Subject: net: Don't include filter.h from net/sock.h

sock.h is pretty heavily used (5k objects rebuilt on x86 after
it's touched). We can drop the include of filter.h from it and
add a forward declaration of struct sk_filter instead.
This decreases the number of rebuilt objects when bpf.h
is touched from ~5k to ~1k.

There's a lot of missing includes this was masking. Primarily
in networking tho, this time.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/bpf/20211229004913.513372-1-kuba@kernel.org
---
 include/linux/bpf_local_storage.h | 1 +
 include/linux/dsa/loop.h          | 1 +
 include/net/ipv6.h                | 2 ++
 include/net/route.h               | 1 +
 include/net/sock.h                | 2 +-
 include/net/xdp_sock.h            | 1 +
 6 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 24496bc28e7b..a2b625960ffe 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -8,6 +8,7 @@
 #define _BPF_LOCAL_STORAGE_H
 
 #include <linux/bpf.h>
+#include <linux/filter.h>
 #include <linux/rculist.h>
 #include <linux/list.h>
 #include <linux/hash.h>
diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h
index 5a3470bcc8a7..b8fef35591aa 100644
--- a/include/linux/dsa/loop.h
+++ b/include/linux/dsa/loop.h
@@ -2,6 +2,7 @@
 #ifndef DSA_LOOP_H
 #define DSA_LOOP_H
 
+#include <linux/if_vlan.h>
 #include <linux/types.h>
 #include <linux/ethtool.h>
 #include <net/dsa.h>
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 53ac7707ca70..3afcb128e064 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -21,6 +21,8 @@
 #include <net/snmp.h>
 #include <net/netns/hash.h>
 
+struct ip_tunnel_info;
+
 #define SIN6_LEN_RFC2133	24
 
 #define IPV6_MAXPLEN		65535
diff --git a/include/net/route.h b/include/net/route.h
index 2e6c0e153e3a..4c858dcf1aa8 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -43,6 +43,7 @@
 #define RT_CONN_FLAGS(sk)   (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
 #define RT_CONN_FLAGS_TOS(sk,tos)   (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
 
+struct ip_tunnel_info;
 struct fib_nh;
 struct fib_info;
 struct uncached_list;
diff --git a/include/net/sock.h b/include/net/sock.h
index 37f878564d25..40f6406b9ca5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -56,7 +56,6 @@
 #include <linux/wait.h>
 #include <linux/cgroup-defs.h>
 #include <linux/rbtree.h>
-#include <linux/filter.h>
 #include <linux/rculist_nulls.h>
 #include <linux/poll.h>
 #include <linux/sockptr.h>
@@ -249,6 +248,7 @@ struct sock_common {
 };
 
 struct bpf_local_storage;
+struct sk_filter;
 
 /**
   *	struct sock - network layer representation of sockets
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index fff069d2ed1b..3057e1a4a11c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -6,6 +6,7 @@
 #ifndef _LINUX_XDP_SOCK_H
 #define _LINUX_XDP_SOCK_H
 
+#include <linux/bpf.h>
 #include <linux/workqueue.h>
 #include <linux/if_xdp.h>
 #include <linux/mutex.h>
-- 
cgit v1.2.3


From 1bb412d46ca9df90a3fe4b704f5385f03621455d Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 Dec 2021 16:09:37 +0800
Subject: net_tstamp: define new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX

As we defined the new hwtstamp_config flag HWTSTAMP_FLAG_BONDED_PHC_INDEX
as enum, it's not easy for userspace program to check if the flag is
supported when build.

Let's define the new flag so user space could build it on old kernel with
ifdef check.

Fixes: 9c9211a3fc7a ("net_tstamp: add new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/net_tstamp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index e258e52cfd1f..55501e5e7ac8 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -87,6 +87,7 @@ enum hwtstamp_flags {
 	 * will be the PHC index.
 	 */
 	HWTSTAMP_FLAG_BONDED_PHC_INDEX = (1<<0),
+#define HWTSTAMP_FLAG_BONDED_PHC_INDEX	HWTSTAMP_FLAG_BONDED_PHC_INDEX
 
 	HWTSTAMP_FLAG_LAST = HWTSTAMP_FLAG_BONDED_PHC_INDEX,
 	HWTSTAMP_FLAG_MASK = (HWTSTAMP_FLAG_LAST - 1) | HWTSTAMP_FLAG_LAST
-- 
cgit v1.2.3


From 0fe4b381a59ebc53522fce579b281a67a9e1bee6 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 24 Dec 2021 15:29:15 +0000
Subject: bpf: Allow bpf_local_storage to be used by sleepable programs

Other maps like hashmaps are already available to sleepable programs.
Sleepable BPF programs run under trace RCU. Allow task, sk and inode
storage to be used from sleepable programs. This allows sleepable and
non-sleepable programs to provide shareable annotations on kernel
objects.

Sleepable programs run in trace RCU where as non-sleepable programs run
in a normal RCU critical section i.e.  __bpf_prog_enter{_sleepable}
and __bpf_prog_exit{_sleepable}) (rcu_read_lock or rcu_read_lock_trace).

In order to make the local storage maps accessible to both sleepable
and non-sleepable programs, one needs to call both
call_rcu_tasks_trace and call_rcu to wait for both trace and classical
RCU grace periods to expire before freeing memory.

Paul's work on call_rcu_tasks_trace allows us to have per CPU queueing
for call_rcu_tasks_trace. This behaviour can be achieved by setting
rcupdate.rcu_task_enqueue_lim=<num_cpus> boot parameter.

In light of these new performance changes and to keep the local storage
code simple, avoid adding a new flag for sleepable maps / local storage
to select the RCU synchronization (trace / classical).

Also, update the dereferencing of the pointers to use
rcu_derference_check (with either the trace or normal RCU locks held)
with a common bpf_rcu_lock_held helper method.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20211224152916.1550677-2-kpsingh@kernel.org
---
 include/linux/bpf_local_storage.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index a2b625960ffe..37b3906af8b1 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -17,6 +17,9 @@
 
 #define BPF_LOCAL_STORAGE_CACHE_SIZE	16
 
+#define bpf_rcu_lock_held()                                                    \
+	(rcu_read_lock_held() || rcu_read_lock_trace_held() ||                 \
+	 rcu_read_lock_bh_held())
 struct bpf_local_storage_map_bucket {
 	struct hlist_head list;
 	raw_spinlock_t lock;
@@ -162,4 +165,6 @@ struct bpf_local_storage_data *
 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 			 void *value, u64 map_flags);
 
+void bpf_local_storage_free_rcu(struct rcu_head *rcu);
+
 #endif /* _BPF_LOCAL_STORAGE_H */
-- 
cgit v1.2.3


From 3b80b73a4b3de38f72cd79e1a157449917f2bcb5 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Dec 2021 17:27:41 -0800
Subject: net: Add includes masked by netdevice.h including uapi/bpf.h

Add missing includes unmasked by the subsequent change.

Mostly network drivers missing an include for XDP_PACKET_HEADROOM.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230012742.770642-2-kuba@kernel.org
---
 include/net/ip6_fib.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 83b8070d1cc9..a9a4ccc0cdb5 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -20,6 +20,7 @@
 #include <net/inetpeer.h>
 #include <net/fib_notifier.h>
 #include <linux/indirect_call_wrapper.h>
+#include <uapi/linux/bpf.h>
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_HASHSZ 256
-- 
cgit v1.2.3


From aebb51ec3db2a871d74b4afad3f9914812acf120 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Dec 2021 17:27:42 -0800
Subject: bpf: Invert the dependency between bpf-netns.h and netns/bpf.h

netns/bpf.h gets included by netdevice.h (thru net_namespace.h)
which in turn gets included in a lot of places. We should keep
netns/bpf.h as light-weight as possible.

bpf-netns.h seems to contain more implementation details than
deserves to be included in a netns header. It needs to pull in
uapi/bpf.h to get various enum types.

Move enum netns_bpf_attach_type to netns/bpf.h and invert the
dependency. This makes netns/bpf.h fit the mold of a struct
definition header more clearly, and drops the number of objects
rebuilt when uapi/bpf.h is touched from 7.7k to 1.1k.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230012742.770642-3-kuba@kernel.org
---
 include/linux/bpf-netns.h | 8 +-------
 include/net/netns/bpf.h   | 9 ++++++++-
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h
index 722f799c1a2e..413cfa5e4b07 100644
--- a/include/linux/bpf-netns.h
+++ b/include/linux/bpf-netns.h
@@ -3,15 +3,9 @@
 #define _BPF_NETNS_H
 
 #include <linux/mutex.h>
+#include <net/netns/bpf.h>
 #include <uapi/linux/bpf.h>
 
-enum netns_bpf_attach_type {
-	NETNS_BPF_INVALID = -1,
-	NETNS_BPF_FLOW_DISSECTOR = 0,
-	NETNS_BPF_SK_LOOKUP,
-	MAX_NETNS_BPF_ATTACH_TYPE
-};
-
 static inline enum netns_bpf_attach_type
 to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
 {
diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h
index 0ca6a1b87185..2c01a278d1eb 100644
--- a/include/net/netns/bpf.h
+++ b/include/net/netns/bpf.h
@@ -6,11 +6,18 @@
 #ifndef __NETNS_BPF_H__
 #define __NETNS_BPF_H__
 
-#include <linux/bpf-netns.h>
+#include <linux/list.h>
 
 struct bpf_prog;
 struct bpf_prog_array;
 
+enum netns_bpf_attach_type {
+	NETNS_BPF_INVALID = -1,
+	NETNS_BPF_FLOW_DISSECTOR = 0,
+	NETNS_BPF_SK_LOOKUP,
+	MAX_NETNS_BPF_ATTACH_TYPE
+};
+
 struct netns_bpf {
 	/* Array of programs to run compiled from progs or links */
 	struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE];
-- 
cgit v1.2.3


From 882c982dada4d53079c56de94ccbce1e21cc675f Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 23 Dec 2021 11:16:17 +0300
Subject: acpi: Store CRC-32 hash of the _PLD in struct acpi_device

Storing CRC-32 hash of the Physical Location of Device
object (_PLD) with devices that have it. The hash is stored
to a new struct acpi_device member "pld_crc".

The hash makes it easier to find devices that share a
location, as there is no need to evaluate the entire object
every time. Knowledge about devices that share a location
can be used in device drivers that need to know the
connections to other components inside a system. USB3 ports
will for example always share their location with a USB2
port.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20211223081620.45479-3-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/acpi/acpi_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 480f9207a4c6..3a1280649ddf 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -358,6 +358,7 @@ struct acpi_gpio_mapping;
 
 /* Device */
 struct acpi_device {
+	u32 pld_crc;
 	int device_type;
 	acpi_handle handle;		/* no handle for fixed hardware */
 	struct fwnode_handle fwnode;
-- 
cgit v1.2.3


From 730b49aac426e1e8016d3c2dd6b407e500423821 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 23 Dec 2021 11:24:22 +0300
Subject: usb: typec: port-mapper: Convert to the component framework

Instead of trying to keep track of the connections to the
USB Type-C connectors separately, letting the component
framework take care of that.

From now on every USB Type-C connector will register itself
as "aggregate" - component master - and anything that can be
connected to it inside the system can then simply register
itself as a generic component.

The matching of the components and the connector shall rely
on ACPI _PLD initially. Before registering itself as the
aggregate, the connector will find all other ACPI devices
that have matching _PLD crc hash with it (matching value in
the pld_crc member of struct acpi_device), and add a
component match entry for each one of them. Because only
ACPI is supported for now, the driver shall only be build
when ACPI is supported.

This removes the need for the custom API that the driver
exposed. The components and the connector can therefore
exist completely independently of each other. The order in
which they are registered, as well as are they modules or
not, is now irrelevant.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20211223082422.45637-1-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index e2e44bb1dad8..7ba45a97eeae 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -305,16 +305,4 @@ void typec_partner_set_svdm_version(struct typec_partner *partner,
 				    enum usb_pd_svdm_ver svdm_version);
 int typec_get_negotiated_svdm_version(struct typec_port *port);
 
-#if IS_REACHABLE(CONFIG_TYPEC)
-int typec_link_port(struct device *port);
-void typec_unlink_port(struct device *port);
-#else
-static inline int typec_link_port(struct device *port)
-{
-	return 0;
-}
-
-static inline void typec_unlink_port(struct device *port) { }
-#endif
-
 #endif /* __LINUX_USB_TYPEC_H */
-- 
cgit v1.2.3


From 510a0bdb2bfcff8d7be822c72adc3add7a97d559 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 23 Dec 2021 11:24:32 +0300
Subject: usb: Remove usb_for_each_port()

There are no more users for the function.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20211223082432.45653-1-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 7ccaa76a9a96..200b7b79acb5 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -875,15 +875,6 @@ extern struct usb_host_interface *usb_find_alt_setting(
 		unsigned int iface_num,
 		unsigned int alt_num);
 
-#if IS_REACHABLE(CONFIG_USB)
-int usb_for_each_port(void *data, int (*fn)(struct device *, void *));
-#else
-static inline int usb_for_each_port(void *data, int (*fn)(struct device *, void *))
-{
-	return 0;
-}
-#endif
-
 /* port claiming functions */
 int usb_hub_claim_port(struct usb_device *hdev, unsigned port1,
 		struct usb_dev_state *owner);
-- 
cgit v1.2.3


From b4a29b94804c4774f22555651296b838df6ec0e4 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 28 Dec 2021 18:22:00 +0100
Subject: serial: 8250: Move Alpha-specific quirk out of the core

struct uart_8250_port contains mcr_mask and mcr_force members whose
sole purpose is to work around an Alpha-specific quirk.  This code
doesn't belong in the core where it is executed by everyone else,
so move it to a proper ->set_mctrl callback which is used on the
affected Alpha machine only.

The quirk was introduced in January 1995:
https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/diff/drivers/char/serial.c?h=1.1.83

The members in struct uart_8250_port were added in 2002:
https://git.kernel.org/history/history/c/4524aad27854

The quirk applies to non-PCI Alphas and arch/alpha/Kconfig specifies
"select FORCE_PCI if !ALPHA_JENSEN".  So apparently the only affected
machine is the EISA-based Jensen that Linus was working on back then:
https://lore.kernel.org/all/CAHk-=wj1JWZ3sCrGz16nxEj7=0O+srMg6Ah3iPTDXSPKEws_SA@mail.gmail.com/

Up until now the quirk is not applied unless CONFIG_PCI is disabled.
If users forget to do that or run a generic Alpha kernel, the serial
ports aren't usable on Jensen.  Avoid by confining the quirk to
CONFIG_ALPHA_JENSEN instead of !CONFIG_PCI.  On generic Alpha kernels,
auto-detect at runtime whether the quirk needs to be applied.

Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Ulrich Teichert <krypton@ulrich-teichert.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/b83d069cb516549b8a5420e097bb6bdd806f36fc.1640695609.git.lukas@wunner.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 5db211f43b29..ff84a3ed10ea 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -104,8 +104,6 @@ struct uart_8250_port {
 	unsigned char		ier;
 	unsigned char		lcr;
 	unsigned char		mcr;
-	unsigned char		mcr_mask;	/* mask of user bits */
-	unsigned char		mcr_force;	/* mask of forced bits */
 	unsigned char		cur_iotype;	/* Running I/O type */
 	unsigned int		rpm_tx_active;
 	unsigned char		canary;		/* non-zero during system sleep
-- 
cgit v1.2.3


From d8e9a406a931f687945703a4bac45042eb81ce92 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Sun, 12 Dec 2021 22:21:28 +0900
Subject: serdev: BREAK/FRAME/PARITY/OVERRUN notification prototype V2

Allow serdev device drivers get notified by hardware errors such as BREAK,
FRAME, PARITY and OVERRUN.

With this patch, in the event of an error detected in the UART device driver
the serdev_device_driver will get the newly introduced ->error() callback
invoked if serdev_device_set_error_mask() has previously been used to enable
the type of error. The errors are taken straight from the TTY layer and fed
into the serdev_device_driver after filtering out only enabled errors.

Without this patch the hardware errors never reach the serdev_device_driver.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Link: https://lore.kernel.org/r/163931528842.27756.3665040315954968747.sendpatchset@octo
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 3368c261ab62..0d0b22fc7e37 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -19,12 +19,15 @@ struct serdev_device;
 
 /**
  * struct serdev_device_ops - Callback operations for a serdev device
+ * @error:		Function called with errors received from device;
+ *			may sleep.
  * @receive_buf:	Function called with data received from device;
  *			returns number of bytes accepted; may sleep.
  * @write_wakeup:	Function called when ready to transmit more data; must
  *			not sleep.
  */
 struct serdev_device_ops {
+	void (*error)(struct serdev_device *, unsigned long);
 	int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t);
 	void (*write_wakeup)(struct serdev_device *);
 };
@@ -76,6 +79,11 @@ enum serdev_parity {
 	SERDEV_PARITY_ODD,
 };
 
+#define SERDEV_ERROR_BREAK 0
+#define SERDEV_ERROR_FRAME 1
+#define SERDEV_ERROR_PARITY 2
+#define SERDEV_ERROR_OVERRUN 3
+
 /*
  * serdev controller structures
  */
@@ -85,6 +93,7 @@ struct serdev_controller_ops {
 	int (*write_room)(struct serdev_controller *);
 	int (*open)(struct serdev_controller *);
 	void (*close)(struct serdev_controller *);
+	void (*set_error_mask)(struct serdev_controller *, unsigned long);
 	void (*set_flow_control)(struct serdev_controller *, bool);
 	int (*set_parity)(struct serdev_controller *, enum serdev_parity);
 	unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int);
@@ -190,12 +199,24 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl,
 	return serdev->ops->receive_buf(serdev, data, count);
 }
 
+static inline void serdev_controller_error(struct serdev_controller *ctrl,
+					   unsigned long errors)
+{
+	struct serdev_device *serdev = ctrl->serdev;
+
+	if (!serdev || !serdev->ops->error)
+		return;
+
+	serdev->ops->error(serdev, errors);
+}
+
 #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS)
 
 int serdev_device_open(struct serdev_device *);
 void serdev_device_close(struct serdev_device *);
 int devm_serdev_device_open(struct device *, struct serdev_device *);
 unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
+void serdev_device_set_error_mask(struct serdev_device *, unsigned long);
 void serdev_device_set_flow_control(struct serdev_device *, bool);
 int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
 void serdev_device_wait_until_sent(struct serdev_device *, long);
@@ -238,6 +259,7 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
 {
 	return 0;
 }
+static inline void serdev_device_set_error_mask(struct serdev_device *sdev, unsigned long mask) {}
 static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
 static inline int serdev_device_write_buf(struct serdev_device *serdev,
 					  const unsigned char *buf,
-- 
cgit v1.2.3


From 5207fb2f311b0c45a9abfa1c84b7a7b657ffa550 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 30 Dec 2021 16:02:41 +0100
Subject: counter: Provide a wrapper to access device private data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For now this just wraps accessing struct counter_device::priv. However
this is about to change and converting drivers to this helper
individually makes fixing device lifetime issues result in easier to
review patches.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20211230150300.72196-5-u.kleine-koenig@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/counter.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/counter.h b/include/linux/counter.h
index dfbde2808998..627f1757f6bb 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -329,6 +329,8 @@ struct counter_device {
 	struct mutex ops_exist_lock;
 };
 
+void *counter_priv(const struct counter_device *const counter);
+
 int counter_register(struct counter_device *const counter);
 void counter_unregister(struct counter_device *const counter);
 int devm_counter_register(struct device *dev,
-- 
cgit v1.2.3


From c18e2760308e30f007fa24b558b87c39d7e86ff1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 30 Dec 2021 16:02:50 +0100
Subject: counter: Provide alternative counter registration functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current implementation gets device lifetime tracking wrong. The
problem is that allocation of struct counter_device is controlled by the
individual drivers but this structure contains a struct device that
might have to live longer than a driver is bound. As a result a command
sequence like:

	{ sleep 5; echo bang; } > /dev/counter0 &
	sleep 1;
	echo 40000000.timer:counter > /sys/bus/platform/drivers/stm32-timer-counter/unbind

can keep a reference to the struct device and unbinding results in
freeing the memory occupied by this device resulting in an oops.

This commit provides two new functions (plus some helpers):
 - counter_alloc() to allocate a struct counter_device that is
   automatically freed once the embedded struct device is released
 - counter_add() to register such a device.

Note that this commit doesn't fix any issues, all drivers have to be
converted to these new functions to correct the lifetime problems.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20211230150300.72196-14-u.kleine-koenig@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/counter.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/counter.h b/include/linux/counter.h
index 627f1757f6bb..ed8d5820f0d1 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -327,14 +327,29 @@ struct counter_device {
 	spinlock_t events_in_lock;
 	struct mutex events_out_lock;
 	struct mutex ops_exist_lock;
+
+	/*
+	 * This can go away once all drivers are converted to
+	 * counter_alloc()/counter_add().
+	 */
+	bool legacy_device;
 };
 
 void *counter_priv(const struct counter_device *const counter);
 
 int counter_register(struct counter_device *const counter);
+
+struct counter_device *counter_alloc(size_t sizeof_priv);
+void counter_put(struct counter_device *const counter);
+int counter_add(struct counter_device *const counter);
+
 void counter_unregister(struct counter_device *const counter);
 int devm_counter_register(struct device *dev,
 			  struct counter_device *const counter);
+struct counter_device *devm_counter_alloc(struct device *dev,
+					  size_t sizeof_priv);
+int devm_counter_add(struct device *dev,
+		     struct counter_device *const counter);
 void counter_push_event(struct counter_device *const counter, const u8 event,
 			const u8 channel);
 
-- 
cgit v1.2.3


From f2ee4759fb700b32a1bd830960fe86bf6bdfd0ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 30 Dec 2021 16:03:00 +0100
Subject: counter: remove old and now unused registration API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Usage of counter_register() yields issues in device lifetime tracking. All
drivers were converted to the new API, so the old one can go away.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20211230150300.72196-24-u.kleine-koenig@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/counter.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/counter.h b/include/linux/counter.h
index ed8d5820f0d1..1fe17f5adb09 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -314,8 +314,6 @@ struct counter_device {
 	struct counter_comp *ext;
 	size_t num_ext;
 
-	void *priv;
-
 	struct device dev;
 	struct cdev chrdev;
 	struct list_head events_list;
@@ -327,25 +325,15 @@ struct counter_device {
 	spinlock_t events_in_lock;
 	struct mutex events_out_lock;
 	struct mutex ops_exist_lock;
-
-	/*
-	 * This can go away once all drivers are converted to
-	 * counter_alloc()/counter_add().
-	 */
-	bool legacy_device;
 };
 
 void *counter_priv(const struct counter_device *const counter);
 
-int counter_register(struct counter_device *const counter);
-
 struct counter_device *counter_alloc(size_t sizeof_priv);
 void counter_put(struct counter_device *const counter);
 int counter_add(struct counter_device *const counter);
 
 void counter_unregister(struct counter_device *const counter);
-int devm_counter_register(struct device *dev,
-			  struct counter_device *const counter);
 struct counter_device *devm_counter_alloc(struct device *dev,
 					  size_t sizeof_priv);
 int devm_counter_add(struct device *dev,
-- 
cgit v1.2.3


From fb0b00af04d083770d6e2762b2838357519f7d2d Mon Sep 17 00:00:00 2001
From: Jinzhou Su <Jinzhou.Su@amd.com>
Date: Fri, 24 Dec 2021 09:04:59 +0800
Subject: ACPI: CPPC: Add CPPC enable register function

Add a new function to enable CPPC feature. This function
will write Continuous Performance Control package
EnableRegister field on the processor.

CPPC EnableRegister register described in section 8.4.7.1 of ACPI 6.4:
This element is optional. If supported, contains a resource descriptor
with a single Register() descriptor that describes a register to which
OSPM writes a One to enable CPPC on this processor. Before this register
is set, the processor will be controlled by legacy mechanisms (ACPI
Pstates, firmware, etc.).

This register will be used for AMD processors to enable AMD P-State
function instead of legacy ACPI P-States.

Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/cppc_acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index bc159a9b4a73..92b7ea8d8f5e 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -138,6 +138,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
 extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
+extern int cppc_set_enable(int cpu, bool enable);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
 extern bool acpi_cpc_valid(void);
 extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data);
@@ -162,6 +163,10 @@ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
 {
 	return -ENOTSUPP;
 }
+static inline int cppc_set_enable(int cpu, bool enable)
+{
+	return -ENOTSUPP;
+}
 static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps)
 {
 	return -ENOTSUPP;
-- 
cgit v1.2.3


From 35f9e773bb883a1be87410570f92c8c438e0478b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 30 Dec 2021 15:17:20 +0100
Subject: ACPI / x86: Add acpi_quirk_skip_[i2c_client|serdev]_enumeration()
 helpers

x86 ACPI boards which ship with only Android as their factory image usually
declare a whole bunch of bogus I2C devs in their ACPI tables and sometimes
there are issues with serdev devices on these boards too, e.g. the resource
points to the wrong serdev_controller.

Instantiating I2C / serdev devs for these bogus devs causes various issues,
e.g. GPIO/IRQ resource conflicts because sometimes drivers do bind to them.
The Android x86 kernel fork shipped on these devices has some special code
to remove the bogus I2C clients (and serdevs are ignored completely).

Introduce acpi_quirk_skip_i2c_client_enumeration() and
acpi_quirk_skip_serdev_enumeration() helpers. Which can be used by the I2C/
serdev code to skip instantiating any I2C or serdev devs on broken boards.

These 2 helpers are added to drivers/acpi/x86/utils.c so that the DMI table
can be shared between the I2C and serdev code.

Note these boards typically do actually have I2C and serdev devices, just
different ones then the ones described in their DSDT. The devices which
are actually present are manually instantiated by the
drivers/platform/x86/x86-android-tablets.c kernel module.

The new helpers are only build if CONFIG_X86_ANDROID_TABLETS is enabled,
otherwise they are empty stubs to not unnecessarily grow the kernel size.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 17f700c9a4f9..88f21780447b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -622,6 +622,22 @@ static inline bool acpi_device_always_present(struct acpi_device *adev)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
+bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev);
+int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip);
+#else
+static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev)
+{
+	return false;
+}
+static inline int
+acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip)
+{
+	*skip = false;
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_PM
 void acpi_pm_wakeup_event(struct device *dev);
 acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
-- 
cgit v1.2.3


From d6c6d0bb2cb3af91c9c1546af7cdf4770d0df443 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 29 Dec 2021 12:36:20 +0100
Subject: net: remove references to CONFIG_IRDA in network header files

Commit d64c2a76123f ("staging: irda: remove the irda network stack and
drivers") removes the config IRDA.

Remove the remaining references to this non-existing config in the network
header files.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20211229113620.19368-1-lukas.bulwahn@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/atalk.h     | 2 +-
 include/linux/netdevice.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index f6034ba774be..a55bfc6567d0 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -113,7 +113,7 @@ extern int aarp_proto_init(void);
 /* Inter module exports */
 
 /* Give a device find its atif control structure */
-#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
+#if IS_ENABLED(CONFIG_ATALK)
 static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
 {
 	return dev->atalk_ptr;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2684bdb6defa..6f99c8f51b60 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2098,7 +2098,7 @@ struct net_device {
 #if IS_ENABLED(CONFIG_TIPC)
 	struct tipc_bearer __rcu *tipc_ptr;
 #endif
-#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
+#if IS_ENABLED(CONFIG_ATALK)
 	void 			*atalk_ptr;
 #endif
 	struct in_device __rcu	*ip_ptr;
-- 
cgit v1.2.3


From c3fb0e280b4cdbf382f59eb6b276e4c6047bc219 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Thu, 18 Nov 2021 02:32:37 +0200
Subject: net/mlx5: DR, Fix lower case macro prefix "mlx5_" to "MLX5_"

Macros prefix should be capital letters - fix the prefix in
mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e9db12aae8f9..18b816b41545 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1291,7 +1291,7 @@ enum {
 enum {
 	MLX5_FLEX_PARSER_GENEVE_ENABLED		= 1 << 3,
 	MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED	= 1 << 4,
-	mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED	= 1 << 5,
+	MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED	= 1 << 5,
 	MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED	= 1 << 7,
 	MLX5_FLEX_PARSER_ICMP_V4_ENABLED	= 1 << 8,
 	MLX5_FLEX_PARSER_ICMP_V6_ENABLED	= 1 << 9,
-- 
cgit v1.2.3


From 0f2a6c3b9219bdf497750258cd2ad513f0056b42 Mon Sep 17 00:00:00 2001
From: Muhammad Sammar <muhammads@nvidia.com>
Date: Sun, 5 Sep 2021 15:16:21 +0300
Subject: net/mlx5: Add misc5 flow table match parameters

Add support for misc5 match parameter as per HW spec, this will allow
matching on tunnel_header fields.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
---
 include/linux/mlx5/device.h              |  1 +
 include/linux/mlx5/mlx5_ifc.h            | 25 ++++++++++++++++++++++++-
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  2 +-
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9c25edfd59a6..604b85dd770a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1117,6 +1117,7 @@ enum {
 	MLX5_MATCH_MISC_PARAMETERS_2	= 1 << 3,
 	MLX5_MATCH_MISC_PARAMETERS_3	= 1 << 4,
 	MLX5_MATCH_MISC_PARAMETERS_4	= 1 << 5,
+	MLX5_MATCH_MISC_PARAMETERS_5	= 1 << 6,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 18b816b41545..c74c5e147cb9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -670,6 +670,26 @@ struct mlx5_ifc_fte_match_set_misc4_bits {
 	u8         reserved_at_100[0x100];
 };
 
+struct mlx5_ifc_fte_match_set_misc5_bits {
+	u8         macsec_tag_0[0x20];
+
+	u8         macsec_tag_1[0x20];
+
+	u8         macsec_tag_2[0x20];
+
+	u8         macsec_tag_3[0x20];
+
+	u8         tunnel_header_0[0x20];
+
+	u8         tunnel_header_1[0x20];
+
+	u8         tunnel_header_2[0x20];
+
+	u8         tunnel_header_3[0x20];
+
+	u8         reserved_at_100[0x100];
+};
+
 struct mlx5_ifc_cmd_pas_bits {
 	u8         pa_h[0x20];
 
@@ -1839,7 +1859,9 @@ struct mlx5_ifc_fte_match_param_bits {
 
 	struct mlx5_ifc_fte_match_set_misc4_bits misc_parameters_4;
 
-	u8         reserved_at_c00[0x400];
+	struct mlx5_ifc_fte_match_set_misc5_bits misc_parameters_5;
+
+	u8         reserved_at_e00[0x200];
 };
 
 enum {
@@ -5977,6 +5999,7 @@ enum {
 	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3,
 	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4,
 	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_4 = 0x5,
+	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_5 = 0x6,
 };
 
 struct mlx5_ifc_query_flow_group_out_bits {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index ca2372864b70..e539c84d63f1 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -252,7 +252,7 @@ enum mlx5_ib_device_query_context_attrs {
 	MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT),
 };
 
-#define MLX5_IB_DW_MATCH_PARAM 0x90
+#define MLX5_IB_DW_MATCH_PARAM 0xA0
 
 struct mlx5_ib_match_params {
 	__u32	match_params[MLX5_IB_DW_MATCH_PARAM];
-- 
cgit v1.2.3


From f59464e257bdbd4df6df9a4505d7858a0baf6cf7 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Mon, 8 Nov 2021 02:42:50 +0200
Subject: net/mlx5: DR, Add support for matching on geneve_tlv_option_0_exist
 field

Match on geneve_tlv_option_0_exist field on devices that support STEv1.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c74c5e147cb9..deaa0f71213f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -372,7 +372,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         reserved_at_37[0x9];
 
 	u8         geneve_tlv_option_0_data[0x1];
-	u8         reserved_at_41[0x4];
+	u8         geneve_tlv_option_0_exist[0x1];
+	u8         reserved_at_42[0x3];
 	u8         outer_first_mpls_over_udp[0x4];
 	u8         outer_first_mpls_over_gre[0x4];
 	u8         inner_first_mpls[0x4];
@@ -551,7 +552,8 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 	u8         bth_opcode[0x8];
 
 	u8         geneve_vni[0x18];
-	u8         reserved_at_d8[0x7];
+	u8         reserved_at_d8[0x6];
+	u8         geneve_tlv_option_0_exist[0x1];
 	u8         geneve_oam[0x1];
 
 	u8         reserved_at_e0[0xc];
-- 
cgit v1.2.3


From 4ff725e1d4adfd313bc9767523fc8d6e90d50f9c Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 23 Nov 2021 02:11:12 +0200
Subject: net/mlx5: DR, Ignore modify TTL if device doesn't support it

When modifying TTL, packet's csum has to be recalculated.
Due to HW issue in ConnectX-5, csum recalculation for modify TTL
is supported through a work-around that is specifically enabled
by configuration.
If the work-around isn't enabled, ignore the modify TTL action
rather than adding an unsupported action.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index deaa0f71213f..598ac3bcc901 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -833,7 +833,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
 	u8      fdb_to_vport_reg_c_id[0x8];
 	u8      reserved_at_8[0xd];
 	u8      fdb_modify_header_fwd_to_table[0x1];
-	u8      reserved_at_16[0x1];
+	u8      fdb_ipv4_ttl_modify[0x1];
 	u8      flow_source[0x1];
 	u8      reserved_at_18[0x2];
 	u8      multi_fdb_encap[0x1];
-- 
cgit v1.2.3


From 99a507a8ea28542ec196e2dd80096708e2482735 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 31 Dec 2021 13:42:30 +0100
Subject: Revert "serdev: BREAK/FRAME/PARITY/OVERRUN notification prototype V2"

This reverts commit d8e9a406a931f687945703a4bac45042eb81ce92.

It needs some future changes as pointed out by Johan and is not ready to
be merged just yet.

Reported-by: Johan Hovold <johan@kernel.org>
Cc: Magnus Damm <damm+renesas@opensource.se>
Link: https://lore.kernel.org/r/Yc7oZ/1tu95Z4wPS@hovoldconsulting.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 0d0b22fc7e37..3368c261ab62 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -19,15 +19,12 @@ struct serdev_device;
 
 /**
  * struct serdev_device_ops - Callback operations for a serdev device
- * @error:		Function called with errors received from device;
- *			may sleep.
  * @receive_buf:	Function called with data received from device;
  *			returns number of bytes accepted; may sleep.
  * @write_wakeup:	Function called when ready to transmit more data; must
  *			not sleep.
  */
 struct serdev_device_ops {
-	void (*error)(struct serdev_device *, unsigned long);
 	int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t);
 	void (*write_wakeup)(struct serdev_device *);
 };
@@ -79,11 +76,6 @@ enum serdev_parity {
 	SERDEV_PARITY_ODD,
 };
 
-#define SERDEV_ERROR_BREAK 0
-#define SERDEV_ERROR_FRAME 1
-#define SERDEV_ERROR_PARITY 2
-#define SERDEV_ERROR_OVERRUN 3
-
 /*
  * serdev controller structures
  */
@@ -93,7 +85,6 @@ struct serdev_controller_ops {
 	int (*write_room)(struct serdev_controller *);
 	int (*open)(struct serdev_controller *);
 	void (*close)(struct serdev_controller *);
-	void (*set_error_mask)(struct serdev_controller *, unsigned long);
 	void (*set_flow_control)(struct serdev_controller *, bool);
 	int (*set_parity)(struct serdev_controller *, enum serdev_parity);
 	unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int);
@@ -199,24 +190,12 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl,
 	return serdev->ops->receive_buf(serdev, data, count);
 }
 
-static inline void serdev_controller_error(struct serdev_controller *ctrl,
-					   unsigned long errors)
-{
-	struct serdev_device *serdev = ctrl->serdev;
-
-	if (!serdev || !serdev->ops->error)
-		return;
-
-	serdev->ops->error(serdev, errors);
-}
-
 #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS)
 
 int serdev_device_open(struct serdev_device *);
 void serdev_device_close(struct serdev_device *);
 int devm_serdev_device_open(struct device *, struct serdev_device *);
 unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
-void serdev_device_set_error_mask(struct serdev_device *, unsigned long);
 void serdev_device_set_flow_control(struct serdev_device *, bool);
 int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
 void serdev_device_wait_until_sent(struct serdev_device *, long);
@@ -259,7 +238,6 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
 {
 	return 0;
 }
-static inline void serdev_device_set_error_mask(struct serdev_device *sdev, unsigned long mask) {}
 static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
 static inline int serdev_device_write_buf(struct serdev_device *serdev,
 					  const unsigned char *buf,
-- 
cgit v1.2.3


From a87d42227cf5614fe0040ddd1fe642c54298b42c Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 17 Dec 2021 11:56:59 +0000
Subject: ASoC: cs35l41: Convert tables to shared source code

To support CS35L41 in HDA systems the HDA driver
for CS35L41 would have to duplicate some functions
that already exist on ASoC driver
So instead of duplicate the code, use the new lib
source as a shared resource for both ASoC and HDA

Also, change the way CONFIG_SND_SOC_CS35L41 is
selected, as reported by Intel Kernel test robot,
it is possible to build SND_SOC_CS35L41_SPI/I2C
without the main driver, which would lead to build
failures.

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20211217115708.882525-2-tanureal@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 733 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 733 insertions(+)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index 1f1e3c6c9be1..aac3ffb9bc89 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -10,6 +10,721 @@
 #ifndef __CS35L41_H
 #define __CS35L41_H
 
+#include <linux/regmap.h>
+
+#define CS35L41_FIRSTREG		0x00000000
+#define CS35L41_LASTREG			0x03804FE8
+#define CS35L41_DEVID			0x00000000
+#define CS35L41_REVID			0x00000004
+#define CS35L41_FABID			0x00000008
+#define CS35L41_RELID			0x0000000C
+#define CS35L41_OTPID			0x00000010
+#define CS35L41_SFT_RESET		0x00000020
+#define CS35L41_TEST_KEY_CTL		0x00000040
+#define CS35L41_USER_KEY_CTL		0x00000044
+#define CS35L41_OTP_MEM0		0x00000400
+#define CS35L41_OTP_MEM31		0x0000047C
+#define CS35L41_OTP_CTRL0		0x00000500
+#define CS35L41_OTP_CTRL1		0x00000504
+#define CS35L41_OTP_CTRL3		0x00000508
+#define CS35L41_OTP_CTRL4		0x0000050C
+#define CS35L41_OTP_CTRL5		0x00000510
+#define CS35L41_OTP_CTRL6		0x00000514
+#define CS35L41_OTP_CTRL7		0x00000518
+#define CS35L41_OTP_CTRL8		0x0000051C
+#define CS35L41_PWR_CTRL1		0x00002014
+#define CS35L41_PWR_CTRL2		0x00002018
+#define CS35L41_PWR_CTRL3		0x0000201C
+#define CS35L41_CTRL_OVRRIDE		0x00002020
+#define CS35L41_AMP_OUT_MUTE		0x00002024
+#define CS35L41_PROTECT_REL_ERR_IGN	0x00002034
+#define CS35L41_GPIO_PAD_CONTROL	0x0000242C
+#define CS35L41_JTAG_CONTROL		0x00002438
+#define CS35L41_PLL_CLK_CTRL		0x00002C04
+#define CS35L41_DSP_CLK_CTRL		0x00002C08
+#define CS35L41_GLOBAL_CLK_CTRL		0x00002C0C
+#define CS35L41_DATA_FS_SEL		0x00002C10
+#define CS35L41_TST_FS_MON0		0x00002D10
+#define CS35L41_MDSYNC_EN		0x00003400
+#define CS35L41_MDSYNC_TX_ID		0x00003408
+#define CS35L41_MDSYNC_PWR_CTRL		0x0000340C
+#define CS35L41_MDSYNC_DATA_TX		0x00003410
+#define CS35L41_MDSYNC_TX_STATUS	0x00003414
+#define CS35L41_MDSYNC_DATA_RX		0x0000341C
+#define CS35L41_MDSYNC_RX_STATUS	0x00003420
+#define CS35L41_MDSYNC_ERR_STATUS	0x00003424
+#define CS35L41_MDSYNC_SYNC_PTE2	0x00003528
+#define CS35L41_MDSYNC_SYNC_PTE3	0x0000352C
+#define CS35L41_MDSYNC_SYNC_MSM_STATUS	0x0000353C
+#define CS35L41_BSTCVRT_VCTRL1		0x00003800
+#define CS35L41_BSTCVRT_VCTRL2		0x00003804
+#define CS35L41_BSTCVRT_PEAK_CUR	0x00003808
+#define CS35L41_BSTCVRT_SFT_RAMP	0x0000380C
+#define CS35L41_BSTCVRT_COEFF		0x00003810
+#define CS35L41_BSTCVRT_SLOPE_LBST	0x00003814
+#define CS35L41_BSTCVRT_SW_FREQ		0x00003818
+#define CS35L41_BSTCVRT_DCM_CTRL	0x0000381C
+#define CS35L41_BSTCVRT_DCM_MODE_FORCE	0x00003820
+#define CS35L41_BSTCVRT_OVERVOLT_CTRL	0x00003830
+#define CS35L41_VI_VOL_POL		0x00004000
+#define CS35L41_VIMON_SPKMON_RESYNC	0x00004100
+#define CS35L41_DTEMP_WARN_THLD		0x00004220
+#define CS35L41_DTEMP_CFG		0x00004224
+#define CS35L41_DTEMP_EN		0x00004308
+#define CS35L41_VPVBST_FS_SEL		0x00004400
+#define CS35L41_SP_ENABLES		0x00004800
+#define CS35L41_SP_RATE_CTRL		0x00004804
+#define CS35L41_SP_FORMAT		0x00004808
+#define CS35L41_SP_HIZ_CTRL		0x0000480C
+#define CS35L41_SP_FRAME_TX_SLOT	0x00004810
+#define CS35L41_SP_FRAME_RX_SLOT	0x00004820
+#define CS35L41_SP_TX_WL		0x00004830
+#define CS35L41_SP_RX_WL		0x00004840
+#define CS35L41_ASP_CONTROL4		0x00004854
+#define CS35L41_DAC_PCM1_SRC		0x00004C00
+#define CS35L41_ASP_TX1_SRC		0x00004C20
+#define CS35L41_ASP_TX2_SRC		0x00004C24
+#define CS35L41_ASP_TX3_SRC		0x00004C28
+#define CS35L41_ASP_TX4_SRC		0x00004C2C
+#define CS35L41_DSP1_RX1_SRC		0x00004C40
+#define CS35L41_DSP1_RX2_SRC		0x00004C44
+#define CS35L41_DSP1_RX3_SRC		0x00004C48
+#define CS35L41_DSP1_RX4_SRC		0x00004C4C
+#define CS35L41_DSP1_RX5_SRC		0x00004C50
+#define CS35L41_DSP1_RX6_SRC		0x00004C54
+#define CS35L41_DSP1_RX7_SRC		0x00004C58
+#define CS35L41_DSP1_RX8_SRC		0x00004C5C
+#define CS35L41_NGATE1_SRC		0x00004C60
+#define CS35L41_NGATE2_SRC		0x00004C64
+#define CS35L41_AMP_DIG_VOL_CTRL	0x00006000
+#define CS35L41_VPBR_CFG		0x00006404
+#define CS35L41_VBBR_CFG		0x00006408
+#define CS35L41_VPBR_STATUS		0x0000640C
+#define CS35L41_VBBR_STATUS		0x00006410
+#define CS35L41_OVERTEMP_CFG		0x00006414
+#define CS35L41_AMP_ERR_VOL		0x00006418
+#define CS35L41_VOL_STATUS_TO_DSP	0x00006450
+#define CS35L41_CLASSH_CFG		0x00006800
+#define CS35L41_WKFET_CFG		0x00006804
+#define CS35L41_NG_CFG			0x00006808
+#define CS35L41_AMP_GAIN_CTRL		0x00006C04
+#define CS35L41_DAC_MSM_CFG		0x00007400
+#define CS35L41_IRQ1_CFG		0x00010000
+#define CS35L41_IRQ1_STATUS		0x00010004
+#define CS35L41_IRQ1_STATUS1		0x00010010
+#define CS35L41_IRQ1_STATUS2		0x00010014
+#define CS35L41_IRQ1_STATUS3		0x00010018
+#define CS35L41_IRQ1_STATUS4		0x0001001C
+#define CS35L41_IRQ1_RAW_STATUS1	0x00010090
+#define CS35L41_IRQ1_RAW_STATUS2	0x00010094
+#define CS35L41_IRQ1_RAW_STATUS3	0x00010098
+#define CS35L41_IRQ1_RAW_STATUS4	0x0001009C
+#define CS35L41_IRQ1_MASK1		0x00010110
+#define CS35L41_IRQ1_MASK2		0x00010114
+#define CS35L41_IRQ1_MASK3		0x00010118
+#define CS35L41_IRQ1_MASK4		0x0001011C
+#define CS35L41_IRQ1_FRC1		0x00010190
+#define CS35L41_IRQ1_FRC2		0x00010194
+#define CS35L41_IRQ1_FRC3		0x00010198
+#define CS35L41_IRQ1_FRC4		0x0001019C
+#define CS35L41_IRQ1_EDGE1		0x00010210
+#define CS35L41_IRQ1_EDGE4		0x0001021C
+#define CS35L41_IRQ1_POL1		0x00010290
+#define CS35L41_IRQ1_POL2		0x00010294
+#define CS35L41_IRQ1_POL3		0x00010298
+#define CS35L41_IRQ1_POL4		0x0001029C
+#define CS35L41_IRQ1_DB3		0x00010318
+#define CS35L41_IRQ2_CFG		0x00010800
+#define CS35L41_IRQ2_STATUS		0x00010804
+#define CS35L41_IRQ2_STATUS1		0x00010810
+#define CS35L41_IRQ2_STATUS2		0x00010814
+#define CS35L41_IRQ2_STATUS3		0x00010818
+#define CS35L41_IRQ2_STATUS4		0x0001081C
+#define CS35L41_IRQ2_RAW_STATUS1	0x00010890
+#define CS35L41_IRQ2_RAW_STATUS2	0x00010894
+#define CS35L41_IRQ2_RAW_STATUS3	0x00010898
+#define CS35L41_IRQ2_RAW_STATUS4	0x0001089C
+#define CS35L41_IRQ2_MASK1		0x00010910
+#define CS35L41_IRQ2_MASK2		0x00010914
+#define CS35L41_IRQ2_MASK3		0x00010918
+#define CS35L41_IRQ2_MASK4		0x0001091C
+#define CS35L41_IRQ2_FRC1		0x00010990
+#define CS35L41_IRQ2_FRC2		0x00010994
+#define CS35L41_IRQ2_FRC3		0x00010998
+#define CS35L41_IRQ2_FRC4		0x0001099C
+#define CS35L41_IRQ2_EDGE1		0x00010A10
+#define CS35L41_IRQ2_EDGE4		0x00010A1C
+#define CS35L41_IRQ2_POL1		0x00010A90
+#define CS35L41_IRQ2_POL2		0x00010A94
+#define CS35L41_IRQ2_POL3		0x00010A98
+#define CS35L41_IRQ2_POL4		0x00010A9C
+#define CS35L41_IRQ2_DB3		0x00010B18
+#define CS35L41_GPIO_STATUS1		0x00011000
+#define CS35L41_GPIO1_CTRL1		0x00011008
+#define CS35L41_GPIO2_CTRL1		0x0001100C
+#define CS35L41_MIXER_NGATE_CFG		0x00012000
+#define CS35L41_MIXER_NGATE_CH1_CFG	0x00012004
+#define CS35L41_MIXER_NGATE_CH2_CFG	0x00012008
+#define CS35L41_DSP_MBOX_1		0x00013000
+#define CS35L41_DSP_MBOX_2		0x00013004
+#define CS35L41_DSP_MBOX_3		0x00013008
+#define CS35L41_DSP_MBOX_4		0x0001300C
+#define CS35L41_DSP_MBOX_5		0x00013010
+#define CS35L41_DSP_MBOX_6		0x00013014
+#define CS35L41_DSP_MBOX_7		0x00013018
+#define CS35L41_DSP_MBOX_8		0x0001301C
+#define CS35L41_DSP_VIRT1_MBOX_1	0x00013020
+#define CS35L41_DSP_VIRT1_MBOX_2	0x00013024
+#define CS35L41_DSP_VIRT1_MBOX_3	0x00013028
+#define CS35L41_DSP_VIRT1_MBOX_4	0x0001302C
+#define CS35L41_DSP_VIRT1_MBOX_5	0x00013030
+#define CS35L41_DSP_VIRT1_MBOX_6	0x00013034
+#define CS35L41_DSP_VIRT1_MBOX_7	0x00013038
+#define CS35L41_DSP_VIRT1_MBOX_8	0x0001303C
+#define CS35L41_DSP_VIRT2_MBOX_1	0x00013040
+#define CS35L41_DSP_VIRT2_MBOX_2	0x00013044
+#define CS35L41_DSP_VIRT2_MBOX_3	0x00013048
+#define CS35L41_DSP_VIRT2_MBOX_4	0x0001304C
+#define CS35L41_DSP_VIRT2_MBOX_5	0x00013050
+#define CS35L41_DSP_VIRT2_MBOX_6	0x00013054
+#define CS35L41_DSP_VIRT2_MBOX_7	0x00013058
+#define CS35L41_DSP_VIRT2_MBOX_8	0x0001305C
+#define CS35L41_CLOCK_DETECT_1		0x00014000
+#define CS35L41_TIMER1_CONTROL		0x00015000
+#define CS35L41_TIMER1_COUNT_PRESET	0x00015004
+#define CS35L41_TIMER1_START_STOP	0x0001500C
+#define CS35L41_TIMER1_STATUS		0x00015010
+#define CS35L41_TIMER1_COUNT_READBACK	0x00015014
+#define CS35L41_TIMER1_DSP_CLK_CFG	0x00015018
+#define CS35L41_TIMER1_DSP_CLK_STATUS	0x0001501C
+#define CS35L41_TIMER2_CONTROL		0x00015100
+#define CS35L41_TIMER2_COUNT_PRESET	0x00015104
+#define CS35L41_TIMER2_START_STOP	0x0001510C
+#define CS35L41_TIMER2_STATUS		0x00015110
+#define CS35L41_TIMER2_COUNT_READBACK	0x00015114
+#define CS35L41_TIMER2_DSP_CLK_CFG	0x00015118
+#define CS35L41_TIMER2_DSP_CLK_STATUS	0x0001511C
+#define CS35L41_DFT_JTAG_CONTROL	0x00016000
+#define CS35L41_DIE_STS1		0x00017040
+#define CS35L41_DIE_STS2		0x00017044
+#define CS35L41_TEMP_CAL1		0x00017048
+#define CS35L41_TEMP_CAL2		0x0001704C
+#define CS35L41_DSP1_XMEM_PACK_0	0x02000000
+#define CS35L41_DSP1_XMEM_PACK_3068	0x02002FF0
+#define CS35L41_DSP1_XMEM_UNPACK32_0	0x02400000
+#define CS35L41_DSP1_XMEM_UNPACK32_2046	0x02401FF8
+#define CS35L41_DSP1_TIMESTAMP_COUNT	0x025C0800
+#define CS35L41_DSP1_SYS_ID		0x025E0000
+#define CS35L41_DSP1_SYS_VERSION	0x025E0004
+#define CS35L41_DSP1_SYS_CORE_ID	0x025E0008
+#define CS35L41_DSP1_SYS_AHB_ADDR	0x025E000C
+#define CS35L41_DSP1_SYS_XSRAM_SIZE	0x025E0010
+#define CS35L41_DSP1_SYS_YSRAM_SIZE	0x025E0018
+#define CS35L41_DSP1_SYS_PSRAM_SIZE	0x025E0020
+#define CS35L41_DSP1_SYS_PM_BOOT_SIZE	0x025E0028
+#define CS35L41_DSP1_SYS_FEATURES	0x025E002C
+#define CS35L41_DSP1_SYS_FIR_FILTERS	0x025E0030
+#define CS35L41_DSP1_SYS_LMS_FILTERS	0x025E0034
+#define CS35L41_DSP1_SYS_XM_BANK_SIZE	0x025E0038
+#define CS35L41_DSP1_SYS_YM_BANK_SIZE	0x025E003C
+#define CS35L41_DSP1_SYS_PM_BANK_SIZE	0x025E0040
+#define CS35L41_DSP1_AHBM_WIN0_CTRL0	0x025E2000
+#define CS35L41_DSP1_AHBM_WIN0_CTRL1	0x025E2004
+#define CS35L41_DSP1_AHBM_WIN1_CTRL0	0x025E2008
+#define CS35L41_DSP1_AHBM_WIN1_CTRL1	0x025E200C
+#define CS35L41_DSP1_AHBM_WIN2_CTRL0	0x025E2010
+#define CS35L41_DSP1_AHBM_WIN2_CTRL1	0x025E2014
+#define CS35L41_DSP1_AHBM_WIN3_CTRL0	0x025E2018
+#define CS35L41_DSP1_AHBM_WIN3_CTRL1	0x025E201C
+#define CS35L41_DSP1_AHBM_WIN4_CTRL0	0x025E2020
+#define CS35L41_DSP1_AHBM_WIN4_CTRL1	0x025E2024
+#define CS35L41_DSP1_AHBM_WIN5_CTRL0	0x025E2028
+#define CS35L41_DSP1_AHBM_WIN5_CTRL1	0x025E202C
+#define CS35L41_DSP1_AHBM_WIN6_CTRL0	0x025E2030
+#define CS35L41_DSP1_AHBM_WIN6_CTRL1	0x025E2034
+#define CS35L41_DSP1_AHBM_WIN7_CTRL0	0x025E2038
+#define CS35L41_DSP1_AHBM_WIN7_CTRL1	0x025E203C
+#define CS35L41_DSP1_AHBM_WIN_DBG_CTRL0	0x025E2040
+#define CS35L41_DSP1_AHBM_WIN_DBG_CTRL1	0x025E2044
+#define CS35L41_DSP1_XMEM_UNPACK24_0	0x02800000
+#define CS35L41_DSP1_XMEM_UNPACK24_4093	0x02803FF4
+#define CS35L41_DSP1_CTRL_BASE		0x02B80000
+#define CS35L41_DSP1_CORE_SOFT_RESET	0x02B80010
+#define CS35L41_DSP1_DEBUG		0x02B80040
+#define CS35L41_DSP1_TIMER_CTRL		0x02B80048
+#define CS35L41_DSP1_STREAM_ARB_CTRL	0x02B80050
+#define CS35L41_DSP1_RX1_RATE		0x02B80080
+#define CS35L41_DSP1_RX2_RATE		0x02B80088
+#define CS35L41_DSP1_RX3_RATE		0x02B80090
+#define CS35L41_DSP1_RX4_RATE		0x02B80098
+#define CS35L41_DSP1_RX5_RATE		0x02B800A0
+#define CS35L41_DSP1_RX6_RATE		0x02B800A8
+#define CS35L41_DSP1_RX7_RATE		0x02B800B0
+#define CS35L41_DSP1_RX8_RATE		0x02B800B8
+#define CS35L41_DSP1_TX1_RATE		0x02B80280
+#define CS35L41_DSP1_TX2_RATE		0x02B80288
+#define CS35L41_DSP1_TX3_RATE		0x02B80290
+#define CS35L41_DSP1_TX4_RATE		0x02B80298
+#define CS35L41_DSP1_TX5_RATE		0x02B802A0
+#define CS35L41_DSP1_TX6_RATE		0x02B802A8
+#define CS35L41_DSP1_TX7_RATE		0x02B802B0
+#define CS35L41_DSP1_TX8_RATE		0x02B802B8
+#define CS35L41_DSP1_NMI_CTRL1		0x02B80480
+#define CS35L41_DSP1_NMI_CTRL2		0x02B80488
+#define CS35L41_DSP1_NMI_CTRL3		0x02B80490
+#define CS35L41_DSP1_NMI_CTRL4		0x02B80498
+#define CS35L41_DSP1_NMI_CTRL5		0x02B804A0
+#define CS35L41_DSP1_NMI_CTRL6		0x02B804A8
+#define CS35L41_DSP1_NMI_CTRL7		0x02B804B0
+#define CS35L41_DSP1_NMI_CTRL8		0x02B804B8
+#define CS35L41_DSP1_RESUME_CTRL	0x02B80500
+#define CS35L41_DSP1_IRQ1_CTRL		0x02B80508
+#define CS35L41_DSP1_IRQ2_CTRL		0x02B80510
+#define CS35L41_DSP1_IRQ3_CTRL		0x02B80518
+#define CS35L41_DSP1_IRQ4_CTRL		0x02B80520
+#define CS35L41_DSP1_IRQ5_CTRL		0x02B80528
+#define CS35L41_DSP1_IRQ6_CTRL		0x02B80530
+#define CS35L41_DSP1_IRQ7_CTRL		0x02B80538
+#define CS35L41_DSP1_IRQ8_CTRL		0x02B80540
+#define CS35L41_DSP1_IRQ9_CTRL		0x02B80548
+#define CS35L41_DSP1_IRQ10_CTRL		0x02B80550
+#define CS35L41_DSP1_IRQ11_CTRL		0x02B80558
+#define CS35L41_DSP1_IRQ12_CTRL		0x02B80560
+#define CS35L41_DSP1_IRQ13_CTRL		0x02B80568
+#define CS35L41_DSP1_IRQ14_CTRL		0x02B80570
+#define CS35L41_DSP1_IRQ15_CTRL		0x02B80578
+#define CS35L41_DSP1_IRQ16_CTRL		0x02B80580
+#define CS35L41_DSP1_IRQ17_CTRL		0x02B80588
+#define CS35L41_DSP1_IRQ18_CTRL		0x02B80590
+#define CS35L41_DSP1_IRQ19_CTRL		0x02B80598
+#define CS35L41_DSP1_IRQ20_CTRL		0x02B805A0
+#define CS35L41_DSP1_IRQ21_CTRL		0x02B805A8
+#define CS35L41_DSP1_IRQ22_CTRL		0x02B805B0
+#define CS35L41_DSP1_IRQ23_CTRL		0x02B805B8
+#define CS35L41_DSP1_SCRATCH1		0x02B805C0
+#define CS35L41_DSP1_SCRATCH2		0x02B805C8
+#define CS35L41_DSP1_SCRATCH3		0x02B805D0
+#define CS35L41_DSP1_SCRATCH4		0x02B805D8
+#define CS35L41_DSP1_CCM_CORE_CTRL	0x02BC1000
+#define CS35L41_DSP1_CCM_CLK_OVERRIDE	0x02BC1008
+#define CS35L41_DSP1_XM_MSTR_EN		0x02BC2000
+#define CS35L41_DSP1_XM_CORE_PRI	0x02BC2008
+#define CS35L41_DSP1_XM_AHB_PACK_PL_PRI	0x02BC2010
+#define CS35L41_DSP1_XM_AHB_UP_PL_PRI	0x02BC2018
+#define CS35L41_DSP1_XM_ACCEL_PL0_PRI	0x02BC2020
+#define CS35L41_DSP1_XM_NPL0_PRI	0x02BC2078
+#define CS35L41_DSP1_YM_MSTR_EN		0x02BC20C0
+#define CS35L41_DSP1_YM_CORE_PRI	0x02BC20C8
+#define CS35L41_DSP1_YM_AHB_PACK_PL_PRI	0x02BC20D0
+#define CS35L41_DSP1_YM_AHB_UP_PL_PRI	0x02BC20D8
+#define CS35L41_DSP1_YM_ACCEL_PL0_PRI	0x02BC20E0
+#define CS35L41_DSP1_YM_NPL0_PRI	0x02BC2138
+#define CS35L41_DSP1_PM_MSTR_EN		0x02BC2180
+#define CS35L41_DSP1_PM_PATCH0_ADDR	0x02BC2188
+#define CS35L41_DSP1_PM_PATCH0_EN	0x02BC218C
+#define CS35L41_DSP1_PM_PATCH0_DATA_LO	0x02BC2190
+#define CS35L41_DSP1_PM_PATCH0_DATA_HI	0x02BC2194
+#define CS35L41_DSP1_PM_PATCH1_ADDR	0x02BC2198
+#define CS35L41_DSP1_PM_PATCH1_EN	0x02BC219C
+#define CS35L41_DSP1_PM_PATCH1_DATA_LO	0x02BC21A0
+#define CS35L41_DSP1_PM_PATCH1_DATA_HI	0x02BC21A4
+#define CS35L41_DSP1_PM_PATCH2_ADDR	0x02BC21A8
+#define CS35L41_DSP1_PM_PATCH2_EN	0x02BC21AC
+#define CS35L41_DSP1_PM_PATCH2_DATA_LO	0x02BC21B0
+#define CS35L41_DSP1_PM_PATCH2_DATA_HI	0x02BC21B4
+#define CS35L41_DSP1_PM_PATCH3_ADDR	0x02BC21B8
+#define CS35L41_DSP1_PM_PATCH3_EN	0x02BC21BC
+#define CS35L41_DSP1_PM_PATCH3_DATA_LO	0x02BC21C0
+#define CS35L41_DSP1_PM_PATCH3_DATA_HI	0x02BC21C4
+#define CS35L41_DSP1_PM_PATCH4_ADDR	0x02BC21C8
+#define CS35L41_DSP1_PM_PATCH4_EN	0x02BC21CC
+#define CS35L41_DSP1_PM_PATCH4_DATA_LO	0x02BC21D0
+#define CS35L41_DSP1_PM_PATCH4_DATA_HI	0x02BC21D4
+#define CS35L41_DSP1_PM_PATCH5_ADDR	0x02BC21D8
+#define CS35L41_DSP1_PM_PATCH5_EN	0x02BC21DC
+#define CS35L41_DSP1_PM_PATCH5_DATA_LO	0x02BC21E0
+#define CS35L41_DSP1_PM_PATCH5_DATA_HI	0x02BC21E4
+#define CS35L41_DSP1_PM_PATCH6_ADDR	0x02BC21E8
+#define CS35L41_DSP1_PM_PATCH6_EN	0x02BC21EC
+#define CS35L41_DSP1_PM_PATCH6_DATA_LO	0x02BC21F0
+#define CS35L41_DSP1_PM_PATCH6_DATA_HI	0x02BC21F4
+#define CS35L41_DSP1_PM_PATCH7_ADDR	0x02BC21F8
+#define CS35L41_DSP1_PM_PATCH7_EN	0x02BC21FC
+#define CS35L41_DSP1_PM_PATCH7_DATA_LO	0x02BC2200
+#define CS35L41_DSP1_PM_PATCH7_DATA_HI	0x02BC2204
+#define CS35L41_DSP1_MPU_XM_ACCESS0	0x02BC3000
+#define CS35L41_DSP1_MPU_YM_ACCESS0	0x02BC3004
+#define CS35L41_DSP1_MPU_WNDW_ACCESS0	0x02BC3008
+#define CS35L41_DSP1_MPU_XREG_ACCESS0	0x02BC300C
+#define CS35L41_DSP1_MPU_YREG_ACCESS0	0x02BC3014
+#define CS35L41_DSP1_MPU_XM_ACCESS1	0x02BC3018
+#define CS35L41_DSP1_MPU_YM_ACCESS1	0x02BC301C
+#define CS35L41_DSP1_MPU_WNDW_ACCESS1	0x02BC3020
+#define CS35L41_DSP1_MPU_XREG_ACCESS1	0x02BC3024
+#define CS35L41_DSP1_MPU_YREG_ACCESS1	0x02BC302C
+#define CS35L41_DSP1_MPU_XM_ACCESS2	0x02BC3030
+#define CS35L41_DSP1_MPU_YM_ACCESS2	0x02BC3034
+#define CS35L41_DSP1_MPU_WNDW_ACCESS2	0x02BC3038
+#define CS35L41_DSP1_MPU_XREG_ACCESS2	0x02BC303C
+#define CS35L41_DSP1_MPU_YREG_ACCESS2	0x02BC3044
+#define CS35L41_DSP1_MPU_XM_ACCESS3	0x02BC3048
+#define CS35L41_DSP1_MPU_YM_ACCESS3	0x02BC304C
+#define CS35L41_DSP1_MPU_WNDW_ACCESS3	0x02BC3050
+#define CS35L41_DSP1_MPU_XREG_ACCESS3	0x02BC3054
+#define CS35L41_DSP1_MPU_YREG_ACCESS3	0x02BC305C
+#define CS35L41_DSP1_MPU_XM_VIO_ADDR	0x02BC3100
+#define CS35L41_DSP1_MPU_XM_VIO_STATUS	0x02BC3104
+#define CS35L41_DSP1_MPU_YM_VIO_ADDR	0x02BC3108
+#define CS35L41_DSP1_MPU_YM_VIO_STATUS	0x02BC310C
+#define CS35L41_DSP1_MPU_PM_VIO_ADDR	0x02BC3110
+#define CS35L41_DSP1_MPU_PM_VIO_STATUS	0x02BC3114
+#define CS35L41_DSP1_MPU_LOCK_CONFIG	0x02BC3140
+#define CS35L41_DSP1_MPU_WDT_RST_CTRL	0x02BC3180
+#define CS35L41_DSP1_STRMARB_MSTR0_CFG0	0x02BC5000
+#define CS35L41_DSP1_STRMARB_MSTR0_CFG1	0x02BC5004
+#define CS35L41_DSP1_STRMARB_MSTR0_CFG2	0x02BC5008
+#define CS35L41_DSP1_STRMARB_MSTR1_CFG0	0x02BC5010
+#define CS35L41_DSP1_STRMARB_MSTR1_CFG1	0x02BC5014
+#define CS35L41_DSP1_STRMARB_MSTR1_CFG2	0x02BC5018
+#define CS35L41_DSP1_STRMARB_MSTR2_CFG0	0x02BC5020
+#define CS35L41_DSP1_STRMARB_MSTR2_CFG1	0x02BC5024
+#define CS35L41_DSP1_STRMARB_MSTR2_CFG2	0x02BC5028
+#define CS35L41_DSP1_STRMARB_MSTR3_CFG0	0x02BC5030
+#define CS35L41_DSP1_STRMARB_MSTR3_CFG1	0x02BC5034
+#define CS35L41_DSP1_STRMARB_MSTR3_CFG2	0x02BC5038
+#define CS35L41_DSP1_STRMARB_MSTR4_CFG0	0x02BC5040
+#define CS35L41_DSP1_STRMARB_MSTR4_CFG1	0x02BC5044
+#define CS35L41_DSP1_STRMARB_MSTR4_CFG2	0x02BC5048
+#define CS35L41_DSP1_STRMARB_MSTR5_CFG0	0x02BC5050
+#define CS35L41_DSP1_STRMARB_MSTR5_CFG1	0x02BC5054
+#define CS35L41_DSP1_STRMARB_MSTR5_CFG2	0x02BC5058
+#define CS35L41_DSP1_STRMARB_MSTR6_CFG0	0x02BC5060
+#define CS35L41_DSP1_STRMARB_MSTR6_CFG1	0x02BC5064
+#define CS35L41_DSP1_STRMARB_MSTR6_CFG2	0x02BC5068
+#define CS35L41_DSP1_STRMARB_MSTR7_CFG0	0x02BC5070
+#define CS35L41_DSP1_STRMARB_MSTR7_CFG1	0x02BC5074
+#define CS35L41_DSP1_STRMARB_MSTR7_CFG2	0x02BC5078
+#define CS35L41_DSP1_STRMARB_TX0_CFG0	0x02BC5200
+#define CS35L41_DSP1_STRMARB_TX0_CFG1	0x02BC5204
+#define CS35L41_DSP1_STRMARB_TX1_CFG0	0x02BC5208
+#define CS35L41_DSP1_STRMARB_TX1_CFG1	0x02BC520C
+#define CS35L41_DSP1_STRMARB_TX2_CFG0	0x02BC5210
+#define CS35L41_DSP1_STRMARB_TX2_CFG1	0x02BC5214
+#define CS35L41_DSP1_STRMARB_TX3_CFG0	0x02BC5218
+#define CS35L41_DSP1_STRMARB_TX3_CFG1	0x02BC521C
+#define CS35L41_DSP1_STRMARB_TX4_CFG0	0x02BC5220
+#define CS35L41_DSP1_STRMARB_TX4_CFG1	0x02BC5224
+#define CS35L41_DSP1_STRMARB_TX5_CFG0	0x02BC5228
+#define CS35L41_DSP1_STRMARB_TX5_CFG1	0x02BC522C
+#define CS35L41_DSP1_STRMARB_TX6_CFG0	0x02BC5230
+#define CS35L41_DSP1_STRMARB_TX6_CFG1	0x02BC5234
+#define CS35L41_DSP1_STRMARB_TX7_CFG0	0x02BC5238
+#define CS35L41_DSP1_STRMARB_TX7_CFG1	0x02BC523C
+#define CS35L41_DSP1_STRMARB_RX0_CFG0	0x02BC5400
+#define CS35L41_DSP1_STRMARB_RX0_CFG1	0x02BC5404
+#define CS35L41_DSP1_STRMARB_RX1_CFG0	0x02BC5408
+#define CS35L41_DSP1_STRMARB_RX1_CFG1	0x02BC540C
+#define CS35L41_DSP1_STRMARB_RX2_CFG0	0x02BC5410
+#define CS35L41_DSP1_STRMARB_RX2_CFG1	0x02BC5414
+#define CS35L41_DSP1_STRMARB_RX3_CFG0	0x02BC5418
+#define CS35L41_DSP1_STRMARB_RX3_CFG1	0x02BC541C
+#define CS35L41_DSP1_STRMARB_RX4_CFG0	0x02BC5420
+#define CS35L41_DSP1_STRMARB_RX4_CFG1	0x02BC5424
+#define CS35L41_DSP1_STRMARB_RX5_CFG0	0x02BC5428
+#define CS35L41_DSP1_STRMARB_RX5_CFG1	0x02BC542C
+#define CS35L41_DSP1_STRMARB_RX6_CFG0	0x02BC5430
+#define CS35L41_DSP1_STRMARB_RX6_CFG1	0x02BC5434
+#define CS35L41_DSP1_STRMARB_RX7_CFG0	0x02BC5438
+#define CS35L41_DSP1_STRMARB_RX7_CFG1	0x02BC543C
+#define CS35L41_DSP1_STRMARB_IRQ0_CFG0	0x02BC5600
+#define CS35L41_DSP1_STRMARB_IRQ0_CFG1	0x02BC5604
+#define CS35L41_DSP1_STRMARB_IRQ0_CFG2	0x02BC5608
+#define CS35L41_DSP1_STRMARB_IRQ1_CFG0	0x02BC5610
+#define CS35L41_DSP1_STRMARB_IRQ1_CFG1	0x02BC5614
+#define CS35L41_DSP1_STRMARB_IRQ1_CFG2	0x02BC5618
+#define CS35L41_DSP1_STRMARB_IRQ2_CFG0	0x02BC5620
+#define CS35L41_DSP1_STRMARB_IRQ2_CFG1	0x02BC5624
+#define CS35L41_DSP1_STRMARB_IRQ2_CFG2	0x02BC5628
+#define CS35L41_DSP1_STRMARB_IRQ3_CFG0	0x02BC5630
+#define CS35L41_DSP1_STRMARB_IRQ3_CFG1	0x02BC5634
+#define CS35L41_DSP1_STRMARB_IRQ3_CFG2	0x02BC5638
+#define CS35L41_DSP1_STRMARB_IRQ4_CFG0	0x02BC5640
+#define CS35L41_DSP1_STRMARB_IRQ4_CFG1	0x02BC5644
+#define CS35L41_DSP1_STRMARB_IRQ4_CFG2	0x02BC5648
+#define CS35L41_DSP1_STRMARB_IRQ5_CFG0	0x02BC5650
+#define CS35L41_DSP1_STRMARB_IRQ5_CFG1	0x02BC5654
+#define CS35L41_DSP1_STRMARB_IRQ5_CFG2	0x02BC5658
+#define CS35L41_DSP1_STRMARB_IRQ6_CFG0	0x02BC5660
+#define CS35L41_DSP1_STRMARB_IRQ6_CFG1	0x02BC5664
+#define CS35L41_DSP1_STRMARB_IRQ6_CFG2	0x02BC5668
+#define CS35L41_DSP1_STRMARB_IRQ7_CFG0	0x02BC5670
+#define CS35L41_DSP1_STRMARB_IRQ7_CFG1	0x02BC5674
+#define CS35L41_DSP1_STRMARB_IRQ7_CFG2	0x02BC5678
+#define CS35L41_DSP1_STRMARB_RESYNC_MSK	0x02BC5A00
+#define CS35L41_DSP1_STRMARB_ERR_STATUS	0x02BC5A08
+#define CS35L41_DSP1_INTPCTL_RES_STATIC	0x02BC6000
+#define CS35L41_DSP1_INTPCTL_RES_DYN	0x02BC6004
+#define CS35L41_DSP1_INTPCTL_NMI_CTRL	0x02BC6008
+#define CS35L41_DSP1_INTPCTL_IRQ_INV	0x02BC6010
+#define CS35L41_DSP1_INTPCTL_IRQ_MODE	0x02BC6014
+#define CS35L41_DSP1_INTPCTL_IRQ_EN	0x02BC6018
+#define CS35L41_DSP1_INTPCTL_IRQ_MSK	0x02BC601C
+#define CS35L41_DSP1_INTPCTL_IRQ_FLUSH	0x02BC6020
+#define CS35L41_DSP1_INTPCTL_IRQ_MSKCLR	0x02BC6024
+#define CS35L41_DSP1_INTPCTL_IRQ_FRC	0x02BC6028
+#define CS35L41_DSP1_INTPCTL_IRQ_MSKSET	0x02BC602C
+#define CS35L41_DSP1_INTPCTL_IRQ_ERR	0x02BC6030
+#define CS35L41_DSP1_INTPCTL_IRQ_PEND	0x02BC6034
+#define CS35L41_DSP1_INTPCTL_IRQ_GEN	0x02BC6038
+#define CS35L41_DSP1_INTPCTL_TESTBITS	0x02BC6040
+#define CS35L41_DSP1_WDT_CONTROL	0x02BC7000
+#define CS35L41_DSP1_WDT_STATUS		0x02BC7008
+#define CS35L41_DSP1_YMEM_PACK_0	0x02C00000
+#define CS35L41_DSP1_YMEM_PACK_1532	0x02C017F0
+#define CS35L41_DSP1_YMEM_UNPACK32_0	0x03000000
+#define CS35L41_DSP1_YMEM_UNPACK32_1022	0x03000FF8
+#define CS35L41_DSP1_YMEM_UNPACK24_0	0x03400000
+#define CS35L41_DSP1_YMEM_UNPACK24_2045	0x03401FF4
+#define CS35L41_DSP1_PMEM_0		0x03800000
+#define CS35L41_DSP1_PMEM_5114		0x03804FE8
+
+/*test regs for emulation bringup*/
+#define CS35L41_PLL_OVR			0x00003018
+#define CS35L41_BST_TEST_DUTY		0x00003900
+#define CS35L41_DIGPWM_IOCTRL		0x0000706C
+
+/*registers populated by OTP*/
+#define CS35L41_OTP_TRIM_1		0x0000208c
+#define CS35L41_OTP_TRIM_2		0x00002090
+#define CS35L41_OTP_TRIM_3		0x00003010
+#define CS35L41_OTP_TRIM_4		0x0000300C
+#define CS35L41_OTP_TRIM_5		0x0000394C
+#define CS35L41_OTP_TRIM_6		0x00003950
+#define CS35L41_OTP_TRIM_7		0x00003954
+#define CS35L41_OTP_TRIM_8		0x00003958
+#define CS35L41_OTP_TRIM_9		0x0000395C
+#define CS35L41_OTP_TRIM_10		0x0000416C
+#define CS35L41_OTP_TRIM_11		0x00004160
+#define CS35L41_OTP_TRIM_12		0x00004170
+#define CS35L41_OTP_TRIM_13		0x00004360
+#define CS35L41_OTP_TRIM_14		0x00004448
+#define CS35L41_OTP_TRIM_15		0x0000444C
+#define CS35L41_OTP_TRIM_16		0x00006E30
+#define CS35L41_OTP_TRIM_17		0x00006E34
+#define CS35L41_OTP_TRIM_18		0x00006E38
+#define CS35L41_OTP_TRIM_19		0x00006E3C
+#define CS35L41_OTP_TRIM_20		0x00006E40
+#define CS35L41_OTP_TRIM_21		0x00006E44
+#define CS35L41_OTP_TRIM_22		0x00006E48
+#define CS35L41_OTP_TRIM_23		0x00006E4C
+#define CS35L41_OTP_TRIM_24		0x00006E50
+#define CS35L41_OTP_TRIM_25		0x00006E54
+#define CS35L41_OTP_TRIM_26		0x00006E58
+#define CS35L41_OTP_TRIM_27		0x00006E5C
+#define CS35L41_OTP_TRIM_28		0x00006E60
+#define CS35L41_OTP_TRIM_29		0x00006E64
+#define CS35L41_OTP_TRIM_30		0x00007418
+#define CS35L41_OTP_TRIM_31		0x0000741C
+#define CS35L41_OTP_TRIM_32		0x00007434
+#define CS35L41_OTP_TRIM_33		0x00007068
+#define CS35L41_OTP_TRIM_34		0x0000410C
+#define CS35L41_OTP_TRIM_35		0x0000400C
+#define CS35L41_OTP_TRIM_36		0x00002030
+
+#define CS35L41_MAX_CACHE_REG		36
+#define CS35L41_OTP_SIZE_WORDS		32
+#define CS35L41_NUM_OTP_ELEM		100
+#define CS35L41_NUM_OTP_MAPS		5
+
+#define CS35L41_VALID_PDATA		0x80000000
+#define CS35L41_NUM_SUPPLIES            2
+
+#define CS35L41_SCLK_MSTR_MASK		0x10
+#define CS35L41_SCLK_MSTR_SHIFT		4
+#define CS35L41_LRCLK_MSTR_MASK		0x01
+#define CS35L41_LRCLK_MSTR_SHIFT	0
+#define CS35L41_SCLK_INV_MASK		0x40
+#define CS35L41_SCLK_INV_SHIFT		6
+#define CS35L41_LRCLK_INV_MASK		0x04
+#define CS35L41_LRCLK_INV_SHIFT		2
+#define CS35L41_SCLK_FRC_MASK		0x20
+#define CS35L41_SCLK_FRC_SHIFT		5
+#define CS35L41_LRCLK_FRC_MASK		0x02
+#define CS35L41_LRCLK_FRC_SHIFT		1
+
+#define CS35L41_AMP_GAIN_PCM_MASK	0x3E0
+#define CS35L41_AMP_GAIN_ZC_MASK	0x0400
+#define CS35L41_AMP_GAIN_ZC_SHIFT	10
+
+#define CS35L41_BST_CTL_MASK		0xFF
+#define CS35L41_BST_CTL_SEL_MASK	0x03
+#define CS35L41_BST_CTL_SEL_REG		0x00
+#define CS35L41_BST_CTL_SEL_CLASSH	0x01
+#define CS35L41_BST_IPK_MASK		0x7F
+#define CS35L41_BST_IPK_SHIFT		0
+#define CS35L41_BST_LIM_MASK		0x4
+#define CS35L41_BST_LIM_SHIFT		2
+#define CS35L41_BST_K1_MASK		0x000000FF
+#define CS35L41_BST_K1_SHIFT		0
+#define CS35L41_BST_K2_MASK		0x0000FF00
+#define CS35L41_BST_K2_SHIFT		8
+#define CS35L41_BST_SLOPE_MASK		0x0000FF00
+#define CS35L41_BST_SLOPE_SHIFT		8
+#define CS35L41_BST_LBST_VAL_MASK	0x00000003
+#define CS35L41_BST_LBST_VAL_SHIFT	0
+
+#define CS35L41_TEMP_THLD_MASK		0x03
+#define CS35L41_VMON_IMON_VOL_MASK	0x07FF07FF
+#define CS35L41_PDM_MODE_MASK		0x01
+#define CS35L41_PDM_MODE_SHIFT		0
+
+#define CS35L41_CH_MEM_DEPTH_MASK	0x07
+#define CS35L41_CH_MEM_DEPTH_SHIFT	0
+#define CS35L41_CH_HDRM_CTL_MASK	0x007F0000
+#define CS35L41_CH_HDRM_CTL_SHIFT	16
+#define CS35L41_CH_REL_RATE_MASK	0xFF00
+#define CS35L41_CH_REL_RATE_SHIFT	8
+#define CS35L41_CH_WKFET_DLY_MASK	0x001C
+#define CS35L41_CH_WKFET_DLY_SHIFT	2
+#define CS35L41_CH_WKFET_THLD_MASK	0x0F00
+#define CS35L41_CH_WKFET_THLD_SHIFT	8
+
+#define CS35L41_HW_NG_SEL_MASK		0x3F00
+#define CS35L41_HW_NG_SEL_SHIFT		8
+#define CS35L41_HW_NG_DLY_MASK		0x0070
+#define CS35L41_HW_NG_DLY_SHIFT		4
+#define CS35L41_HW_NG_THLD_MASK		0x0007
+#define CS35L41_HW_NG_THLD_SHIFT	0
+
+#define CS35L41_DSP_NG_ENABLE_MASK	0x00010000
+#define CS35L41_DSP_NG_ENABLE_SHIFT	16
+#define CS35L41_DSP_NG_THLD_MASK	0x7
+#define CS35L41_DSP_NG_THLD_SHIFT	0
+#define CS35L41_DSP_NG_DELAY_MASK	0x0F00
+#define CS35L41_DSP_NG_DELAY_SHIFT	8
+
+#define CS35L41_ASP_FMT_MASK		0x0700
+#define CS35L41_ASP_FMT_SHIFT		8
+#define CS35L41_ASP_DOUT_HIZ_MASK	0x03
+#define CS35L41_ASP_DOUT_HIZ_SHIFT	0
+#define CS35L41_ASP_WIDTH_16		0x10
+#define CS35L41_ASP_WIDTH_24		0x18
+#define CS35L41_ASP_WIDTH_32		0x20
+#define CS35L41_ASP_WIDTH_TX_MASK	0xFF0000
+#define CS35L41_ASP_WIDTH_TX_SHIFT	16
+#define CS35L41_ASP_WIDTH_RX_MASK	0xFF000000
+#define CS35L41_ASP_WIDTH_RX_SHIFT	24
+#define CS35L41_ASP_RX1_SLOT_MASK	0x3F
+#define CS35L41_ASP_RX1_SLOT_SHIFT	0
+#define CS35L41_ASP_RX2_SLOT_MASK	0x3F00
+#define CS35L41_ASP_RX2_SLOT_SHIFT	8
+#define CS35L41_ASP_RX_WL_MASK		0x3F
+#define CS35L41_ASP_TX_WL_MASK		0x3F
+#define CS35L41_ASP_RX_WL_SHIFT		0
+#define CS35L41_ASP_TX_WL_SHIFT		0
+#define CS35L41_ASP_SOURCE_MASK		0x7F
+
+#define CS35L41_INPUT_SRC_ASPRX1	0x08
+#define CS35L41_INPUT_SRC_ASPRX2	0x09
+#define CS35L41_INPUT_SRC_VMON		0x18
+#define CS35L41_INPUT_SRC_IMON		0x19
+#define CS35L41_INPUT_SRC_CLASSH	0x21
+#define CS35L41_INPUT_SRC_VPMON		0x28
+#define CS35L41_INPUT_SRC_VBSTMON	0x29
+#define CS35L41_INPUT_SRC_TEMPMON	0x3A
+#define CS35L41_INPUT_SRC_RSVD		0x3B
+#define CS35L41_INPUT_DSP_TX1		0x32
+#define CS35L41_INPUT_DSP_TX2		0x33
+
+#define CS35L41_PLL_CLK_SEL_MASK	0x07
+#define CS35L41_PLL_CLK_SEL_SHIFT	0
+#define CS35L41_PLL_CLK_EN_MASK		0x10
+#define CS35L41_PLL_CLK_EN_SHIFT	4
+#define CS35L41_PLL_OPENLOOP_MASK	0x0800
+#define CS35L41_PLL_OPENLOOP_SHIFT	11
+#define CS35L41_PLLSRC_SCLK		0
+#define CS35L41_PLLSRC_LRCLK		1
+#define CS35L41_PLLSRC_SELF		3
+#define CS35L41_PLLSRC_PDMCLK		4
+#define CS35L41_PLLSRC_MCLK		5
+#define CS35L41_PLLSRC_SWIRE		7
+#define CS35L41_REFCLK_FREQ_MASK	0x7E0
+#define CS35L41_REFCLK_FREQ_SHIFT	5
+
+#define CS35L41_GLOBAL_FS_MASK		0x1F
+#define CS35L41_GLOBAL_FS_SHIFT		0
+
+#define CS35L41_GLOBAL_EN_MASK		0x01
+#define CS35L41_GLOBAL_EN_SHIFT		0
+#define CS35L41_BST_EN_MASK		0x0030
+#define CS35L41_BST_EN_SHIFT		4
+#define CS35L41_BST_EN_DEFAULT		0x2
+#define CS35L41_AMP_EN_SHIFT		0
+#define CS35L41_AMP_EN_MASK		1
+
+#define CS35L41_PDN_DONE_MASK		0x00800000
+#define CS35L41_PDN_DONE_SHIFT		23
+#define CS35L41_PUP_DONE_MASK		0x01000000
+#define CS35L41_PUP_DONE_SHIFT		24
+
+#define CS35L36_PUP_DONE_IRQ_UNMASK	0x5F
+#define CS35L36_PUP_DONE_IRQ_MASK	0xBF
+
+#define CS35L41_AMP_SHORT_ERR		0x80000000
+#define CS35L41_BST_SHORT_ERR		0x0100
+#define CS35L41_TEMP_WARN		0x8000
+#define CS35L41_TEMP_ERR		0x00020000
+#define CS35L41_BST_OVP_ERR		0x40
+#define CS35L41_BST_DCM_UVP_ERR		0x80
+#define CS35L41_OTP_BOOT_DONE		0x02
+#define CS35L41_PLL_UNLOCK		0x10
+#define CS35L41_OTP_BOOT_ERR		0x80000000
+
+#define CS35L41_AMP_SHORT_ERR_RLS	0x02
+#define CS35L41_BST_SHORT_ERR_RLS	0x04
+#define CS35L41_BST_OVP_ERR_RLS		0x08
+#define CS35L41_BST_UVP_ERR_RLS		0x10
+#define CS35L41_TEMP_WARN_ERR_RLS	0x20
+#define CS35L41_TEMP_ERR_RLS		0x40
+
+#define CS35L41_INT1_MASK_DEFAULT	0x7FFCFE3F
+#define CS35L41_INT1_UNMASK_PUP		0xFEFFFFFF
+#define CS35L41_INT1_UNMASK_PDN		0xFF7FFFFF
+
+#define CS35L41_GPIO_DIR_MASK		0x80000000
+#define CS35L41_GPIO_DIR_SHIFT		31
+#define CS35L41_GPIO1_CTRL_MASK		0x00030000
+#define CS35L41_GPIO1_CTRL_SHIFT	16
+#define CS35L41_GPIO2_CTRL_MASK		0x07000000
+#define CS35L41_GPIO2_CTRL_SHIFT	24
+#define CS35L41_GPIO_CTRL_OPEN_INT	2
+#define CS35L41_GPIO_CTRL_ACTV_LO	4
+#define CS35L41_GPIO_CTRL_ACTV_HI	5
+#define CS35L41_GPIO_POL_MASK		0x1000
+#define CS35L41_GPIO_POL_SHIFT		12
+
+#define CS35L41_AMP_INV_PCM_SHIFT	14
+#define CS35L41_AMP_INV_PCM_MASK	BIT(CS35L41_AMP_INV_PCM_SHIFT)
+#define CS35L41_AMP_PCM_VOL_SHIFT	3
+#define CS35L41_AMP_PCM_VOL_MASK	(0x7FF << 3)
+#define CS35L41_AMP_PCM_VOL_MUTE	0x4CF
+
+#define CS35L41_CHIP_ID			0x35a40
+#define CS35L41R_CHIP_ID		0x35b40
+#define CS35L41_MTLREVID_MASK		0x0F
+#define CS35L41_REVID_A0		0xA0
+#define CS35L41_REVID_B0		0xB0
+#define CS35L41_REVID_B2		0xB2
+
+#define CS35L41_HALO_CORE_RESET		0x00000200
+
+#define CS35L41_FS1_WINDOW_MASK		0x000007FF
+#define CS35L41_FS2_WINDOW_MASK		0x00FFF800
+#define CS35L41_FS2_WINDOW_SHIFT	12
+
+#define CS35L41_SPI_MAX_FREQ		4000000
+#define CS35L41_REGSTRIDE		4
+
 enum cs35l41_clk_ids {
 	CS35L41_CLKID_SCLK = 0,
 	CS35L41_CLKID_LRCLK = 1,
@@ -31,4 +746,22 @@ struct cs35l41_platform_data {
 	struct cs35l41_irq_cfg irq_config2;
 };
 
+struct cs35l41_otp_packed_element_t {
+	u32 reg;
+	u8 shift;
+	u8 size;
+};
+
+struct cs35l41_otp_map_element_t {
+	u32 id;
+	u32 num_elements;
+	const struct cs35l41_otp_packed_element_t *map;
+	u32 bit_offset;
+	u32 word_offset;
+};
+
+extern const struct cs35l41_otp_map_element_t cs35l41_otp_map_map[CS35L41_NUM_OTP_MAPS];
+extern struct regmap_config cs35l41_regmap_i2c;
+extern struct regmap_config cs35l41_regmap_spi;
+
 #endif /* __CS35L41_H */
-- 
cgit v1.2.3


From fe120d4cb6f6cd03007239e7c578b8703fe6d336 Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 17 Dec 2021 11:57:00 +0000
Subject: ASoC: cs35l41: Move cs35l41_otp_unpack to shared code

ASoC and HDA will do the same cs35l41_otp_unpack, so move it
to shared code

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211217115708.882525-3-tanureal@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index aac3ffb9bc89..6cf3ef02b26a 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -534,7 +534,6 @@
 #define CS35L41_MAX_CACHE_REG		36
 #define CS35L41_OTP_SIZE_WORDS		32
 #define CS35L41_NUM_OTP_ELEM		100
-#define CS35L41_NUM_OTP_MAPS		5
 
 #define CS35L41_VALID_PDATA		0x80000000
 #define CS35L41_NUM_SUPPLIES            2
@@ -760,8 +759,9 @@ struct cs35l41_otp_map_element_t {
 	u32 word_offset;
 };
 
-extern const struct cs35l41_otp_map_element_t cs35l41_otp_map_map[CS35L41_NUM_OTP_MAPS];
 extern struct regmap_config cs35l41_regmap_i2c;
 extern struct regmap_config cs35l41_regmap_spi;
 
+int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap);
+
 #endif /* __CS35L41_H */
-- 
cgit v1.2.3


From 8b2278604b6de27329ec7ed82ca696c4751111b6 Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 17 Dec 2021 11:57:02 +0000
Subject: ASoC: cs35l41: Create shared function for errata patches

ASoC and HDA systems require the same errata patches, so
move it to the shared code using a function the correctly
applies the patches by revision

Also, move CS35L41_DSP1_CCM_CORE_CTRL write to errata
patch function as is required to be written at boot,
but not in regmap_register_patch sequence as will affect
waking up from hibernation

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211217115708.882525-5-tanureal@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index 6cf3ef02b26a..ad2e32a12b8c 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -763,5 +763,6 @@ extern struct regmap_config cs35l41_regmap_i2c;
 extern struct regmap_config cs35l41_regmap_spi;
 
 int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap);
+int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid);
 
 #endif /* __CS35L41_H */
-- 
cgit v1.2.3


From 3bc3e3da657f17c14df8ae8fab58183407bd7521 Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 17 Dec 2021 11:57:03 +0000
Subject: ASoC: cs35l41: Create shared function for setting channels

ASoC and HDA will use the same register to set channels
for the device

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211217115708.882525-6-tanureal@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index ad2e32a12b8c..39d150f61382 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -764,5 +764,8 @@ extern struct regmap_config cs35l41_regmap_spi;
 
 int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap);
 int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid);
+int cs35l41_set_channels(struct device *dev, struct regmap *reg,
+			 unsigned int tx_num, unsigned int *tx_slot,
+			 unsigned int rx_num, unsigned int *rx_slot);
 
 #endif /* __CS35L41_H */
-- 
cgit v1.2.3


From e8e4fcc047c6e0c5411faeb8cc29aed2e5036a00 Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 17 Dec 2021 11:57:04 +0000
Subject: ASoC: cs35l41: Create shared function for boost configuration

ASoC and HDA will use the same registers to configure
internal boost for the device

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20211217115708.882525-7-tanureal@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index 39d150f61382..29a527457b48 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -767,5 +767,7 @@ int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsign
 int cs35l41_set_channels(struct device *dev, struct regmap *reg,
 			 unsigned int tx_num, unsigned int *tx_slot,
 			 unsigned int rx_num, unsigned int *rx_slot);
+int cs35l41_boost_config(struct device *dev, struct regmap *regmap, int boost_ind, int boost_cap,
+			 int boost_ipk);
 
 #endif /* __CS35L41_H */
-- 
cgit v1.2.3


From 9a45ac2320d0a6ae01880a30d4b86025fce4061b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 22 Dec 2021 22:41:18 -0500
Subject: fbdev: fbmem: add a helper to determine if an aperture is used by a
 fw fb

Add a function for drivers to check if the a firmware initialized
fb is corresponds to their aperture.  This allows drivers to check if the
device corresponds to what the firmware set up as the display device.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=215203
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1840
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/linux/fb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 6f3db99ab990..3da95842b207 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -610,6 +610,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
 					       const char *name);
 extern int remove_conflicting_framebuffers(struct apertures_struct *a,
 					   const char *name, bool primary);
+extern bool is_firmware_framebuffer(struct apertures_struct *a);
 extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
 extern int fb_show_logo(struct fb_info *fb_info, int rotate);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-- 
cgit v1.2.3


From 1b4e3f26f9f7553b260b8aed43967500961448a6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 2 Dec 2021 15:06:14 +0000
Subject: mm: vmscan: Reduce throttling due to a failure to make progress

Mike Galbraith, Alexey Avramov and Darrick Wong all reported similar
problems due to reclaim throttling for excessive lengths of time.  In
Alexey's case, a memory hog that should go OOM quickly stalls for
several minutes before stalling.  In Mike and Darrick's cases, a small
memcg environment stalled excessively even though the system had enough
memory overall.

Commit 69392a403f49 ("mm/vmscan: throttle reclaim when no progress is
being made") introduced the problem although commit a19594ca4a8b
("mm/vmscan: increase the timeout if page reclaim is not making
progress") made it worse.  Systems at or near an OOM state that cannot
be recovered must reach OOM quickly and memcg should kill tasks if a
memcg is near OOM.

To address this, only stall for the first zone in the zonelist, reduce
the timeout to 1 tick for VMSCAN_THROTTLE_NOPROGRESS and only stall if
the scan control nr_reclaimed is 0, kswapd is still active and there
were excessive pages pending for writeback.  If kswapd has stopped
reclaiming due to excessive failures, do not stall at all so that OOM
triggers relatively quickly.  Similarly, if an LRU is simply congested,
only lightly throttle similar to NOPROGRESS.

Alexey's original case was the most straight forward

	for i in {1..3}; do tail /dev/zero; done

On vanilla 5.16-rc1, this test stalled heavily, after the patch the test
completes in a few seconds similar to 5.15.

Alexey's second test case added watching a youtube video while tail runs
10 times.  On 5.15, playback only jitters slightly, 5.16-rc1 stalls a
lot with lots of frames missing and numerous audio glitches.  With this
patch applies, the video plays similarly to 5.15.

[lkp@intel.com: Fix W=1 build warning]

Link: https://lore.kernel.org/r/99e779783d6c7fce96448a3402061b9dc1b3b602.camel@gmx.de
Link: https://lore.kernel.org/r/20211124011954.7cab9bb4@mail.inbox.lv
Link: https://lore.kernel.org/r/20211022144651.19914-1-mgorman@techsingularity.net
Link: https://lore.kernel.org/r/20211202150614.22440-1-mgorman@techsingularity.net
Link: https://linux-regtracking.leemhuis.info/regzbot/regression/20211124011954.7cab9bb4@mail.inbox.lv/
Reported-and-tested-by: Alexey Avramov <hakavlad@inbox.lv>
Reported-and-tested-by: Mike Galbraith <efault@gmx.de>
Reported-and-tested-by: Darrick J. Wong <djwong@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Tracked-by: Thorsten Leemhuis <regressions@leemhuis.info>
Fixes: 69392a403f49 ("mm/vmscan: throttle reclaim when no progress is being made")
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h        | 1 +
 include/trace/events/vmscan.h | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 58e744b78c2c..936dc0b6c226 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -277,6 +277,7 @@ enum vmscan_throttle_state {
 	VMSCAN_THROTTLE_WRITEBACK,
 	VMSCAN_THROTTLE_ISOLATED,
 	VMSCAN_THROTTLE_NOPROGRESS,
+	VMSCAN_THROTTLE_CONGESTED,
 	NR_VMSCAN_THROTTLE,
 };
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index f25a6149d3ba..ca2e9009a651 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -30,12 +30,14 @@
 #define _VMSCAN_THROTTLE_WRITEBACK	(1 << VMSCAN_THROTTLE_WRITEBACK)
 #define _VMSCAN_THROTTLE_ISOLATED	(1 << VMSCAN_THROTTLE_ISOLATED)
 #define _VMSCAN_THROTTLE_NOPROGRESS	(1 << VMSCAN_THROTTLE_NOPROGRESS)
+#define _VMSCAN_THROTTLE_CONGESTED	(1 << VMSCAN_THROTTLE_CONGESTED)
 
 #define show_throttle_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",					\
 		{_VMSCAN_THROTTLE_WRITEBACK,	"VMSCAN_THROTTLE_WRITEBACK"},	\
 		{_VMSCAN_THROTTLE_ISOLATED,	"VMSCAN_THROTTLE_ISOLATED"},	\
-		{_VMSCAN_THROTTLE_NOPROGRESS,	"VMSCAN_THROTTLE_NOPROGRESS"}	\
+		{_VMSCAN_THROTTLE_NOPROGRESS,	"VMSCAN_THROTTLE_NOPROGRESS"},	\
+		{_VMSCAN_THROTTLE_CONGESTED,	"VMSCAN_THROTTLE_CONGESTED"}	\
 		) : "VMSCAN_THROTTLE_NONE"
 
 
-- 
cgit v1.2.3


From 79d39fc503b43b566feae5bc9a57dfcffdf41bd1 Mon Sep 17 00:00:00 2001
From: Tony Lu <tonylu@linux.alibaba.com>
Date: Tue, 28 Dec 2021 21:06:10 +0800
Subject: net/smc: Add netlink net namespace support

This adds net namespace ID to diag of linkgroup, helps us to distinguish
different namespaces, and net_cookie is unique in the whole system.

Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h      |  2 ++
 include/uapi/linux/smc_diag.h | 11 ++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 20f33b27787f..6c2874fd2c00 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -119,6 +119,8 @@ enum {
 	SMC_NLA_LGR_R_CONNS_NUM,	/* u32 */
 	SMC_NLA_LGR_R_V2_COMMON,	/* nest */
 	SMC_NLA_LGR_R_V2,		/* nest */
+	SMC_NLA_LGR_R_NET_COOKIE,	/* u64 */
+	SMC_NLA_LGR_R_PAD,		/* flag */
 	__SMC_NLA_LGR_R_MAX,
 	SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
 };
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 8cb3a6fef553..c7008d87f1a4 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -84,11 +84,12 @@ struct smc_diag_conninfo {
 /* SMC_DIAG_LINKINFO */
 
 struct smc_diag_linkinfo {
-	__u8 link_id;			/* link identifier */
-	__u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */
-	__u8 ibport;			/* RDMA device port number */
-	__u8 gid[40];			/* local GID */
-	__u8 peer_gid[40];		/* peer GID */
+	__u8		link_id;		    /* link identifier */
+	__u8		ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */
+	__u8		ibport;			    /* RDMA device port number */
+	__u8		gid[40];		    /* local GID */
+	__u8		peer_gid[40];		    /* peer GID */
+	__aligned_u64	net_cookie;                 /* RDMA device net namespace */
 };
 
 struct smc_diag_lgrinfo {
-- 
cgit v1.2.3


From f9d31c4cf4c11ff10317f038b9c6f7c3bda6cdd4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 31 Dec 2021 18:37:37 -0500
Subject: sctp: hold endpoint before calling cb in
 sctp_transport_lookup_process

The same fix in commit 5ec7d18d1813 ("sctp: use call_rcu to free endpoint")
is also needed for dumping one asoc and sock after the lookup.

Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d314a180ab93..3ae61ce2eabd 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -112,8 +112,7 @@ struct sctp_transport *sctp_transport_get_next(struct net *net,
 			struct rhashtable_iter *iter);
 struct sctp_transport *sctp_transport_get_idx(struct net *net,
 			struct rhashtable_iter *iter, int pos);
-int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
-				  struct net *net,
+int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net,
 				  const union sctp_addr *laddr,
 				  const union sctp_addr *paddr, void *p);
 int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
-- 
cgit v1.2.3


From e7026f15564fbe0c8b091f218203111f77b84eda Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Tue, 28 Dec 2021 21:03:06 -0800
Subject: net: phy: lynx: refactor Lynx PCS module to use generic phylink_pcs

Remove references to lynx_pcs structures so drivers like the Felix DSA
can reference alternate PCS drivers.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs-lynx.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
index a6440d6ebe95..5712cc2ce775 100644
--- a/include/linux/pcs-lynx.h
+++ b/include/linux/pcs-lynx.h
@@ -9,13 +9,10 @@
 #include <linux/mdio.h>
 #include <linux/phylink.h>
 
-struct lynx_pcs {
-	struct phylink_pcs pcs;
-	struct mdio_device *mdio;
-};
+struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs);
 
-struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio);
+struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio);
 
-void lynx_pcs_destroy(struct lynx_pcs *pcs);
+void lynx_pcs_destroy(struct phylink_pcs *pcs);
 
 #endif /* __LINUX_PCS_LYNX_H */
-- 
cgit v1.2.3


From ece014141cd4b49f2d763f28b19e417b84460560 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 3 May 2021 07:29:47 -0400
Subject: mm/doc: Add documentation for folio_test_uptodate

Move the PG_uptodate documentation to be documentation for
folio_test_uptodate() and expand on it a little.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/page-flags.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b5f14d581113..b3d353d537e2 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -68,9 +68,6 @@
  * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
  * a result of MADV_FREE).
  *
- * PG_uptodate tells whether the page's contents is valid.  When a read
- * completes, the page becomes uptodate, unless a disk I/O error happened.
- *
  * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
  * file-backed pagecache (see mm/vmscan.c).
  *
@@ -615,6 +612,16 @@ TESTPAGEFLAG_FALSE(Ksm, ksm)
 
 u64 stable_page_flags(struct page *page);
 
+/**
+ * folio_test_uptodate - Is this folio up to date?
+ * @folio: The folio.
+ *
+ * The uptodate flag is set on a folio when every byte in the folio is
+ * at least as new as the corresponding bytes on storage.  Anonymous
+ * and CoW folios are always uptodate.  If the folio is not uptodate,
+ * some of the bytes in it may be; see the is_partially_uptodate()
+ * address_space operation.
+ */
 static inline bool folio_test_uptodate(struct folio *folio)
 {
 	bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
-- 
cgit v1.2.3


From 10331795fb7991a39ebd0330fdb074cbd81fef48 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 6 Dec 2021 15:24:51 -0500
Subject: pagevec: Add folio_batch

The folio_batch is the same as the pagevec, except that it is typed
to contain folios and not pages.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagevec.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 7f3f19065a9f..c3fa616d7ae7 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -15,8 +15,10 @@
 #define PAGEVEC_SIZE	15
 
 struct page;
+struct folio;
 struct address_space;
 
+/* Layout must match folio_batch */
 struct pagevec {
 	unsigned char nr;
 	bool percpu_pvec_drained;
@@ -81,4 +83,71 @@ static inline void pagevec_release(struct pagevec *pvec)
 		__pagevec_release(pvec);
 }
 
+/**
+ * struct folio_batch - A collection of folios.
+ *
+ * The folio_batch is used to amortise the cost of retrieving and
+ * operating on a set of folios.  The order of folios in the batch may be
+ * significant (eg delete_from_page_cache_batch()).  Some users of the
+ * folio_batch store "exceptional" entries in it which can be removed
+ * by calling folio_batch_remove_exceptionals().
+ */
+struct folio_batch {
+	unsigned char nr;
+	bool percpu_pvec_drained;
+	struct folio *folios[PAGEVEC_SIZE];
+};
+
+/* Layout must match pagevec */
+static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch));
+static_assert(offsetof(struct pagevec, pages) ==
+		offsetof(struct folio_batch, folios));
+
+/**
+ * folio_batch_init() - Initialise a batch of folios
+ * @fbatch: The folio batch.
+ *
+ * A freshly initialised folio_batch contains zero folios.
+ */
+static inline void folio_batch_init(struct folio_batch *fbatch)
+{
+	fbatch->nr = 0;
+}
+
+static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
+{
+	return fbatch->nr;
+}
+
+static inline unsigned int fbatch_space(struct folio_batch *fbatch)
+{
+	return PAGEVEC_SIZE - fbatch->nr;
+}
+
+/**
+ * folio_batch_add() - Add a folio to a batch.
+ * @fbatch: The folio batch.
+ * @folio: The folio to add.
+ *
+ * The folio is added to the end of the batch.
+ * The batch must have previously been initialised using folio_batch_init().
+ *
+ * Return: The number of slots still available.
+ */
+static inline unsigned folio_batch_add(struct folio_batch *fbatch,
+		struct folio *folio)
+{
+	fbatch->folios[fbatch->nr++] = folio;
+	return fbatch_space(fbatch);
+}
+
+static inline void folio_batch_release(struct folio_batch *fbatch)
+{
+	pagevec_release((struct pagevec *)fbatch);
+}
+
+static inline void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
+{
+	pagevec_remove_exceptionals((struct pagevec *)fbatch);
+}
 #endif /* _LINUX_PAGEVEC_H */
-- 
cgit v1.2.3


From e66d70c034dbdfe1a48863f0865ac86aaf2fef1a Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Mon, 13 Dec 2021 10:51:41 +0530
Subject: dmaengine: xilinx_dpdma: use correct SDPX tag for header file

Commit 188c310bdd5d ("dmaengine: xilinx_dpdma: stop using slave_id
field") add the header file with incorrect format for SPDX tag, fix that

WARNING: Improper SPDX comment style for 'include/linux/dma/xilinx_dpdma.h', please use '/*' instead
#1: FILE: include/linux/dma/xilinx_dpdma.h:1:
+// SPDX-License-Identifier: GPL-2.0

WARNING: Missing or malformed SPDX-License-Identifier tag in line 1
#1: FILE: include/linux/dma/xilinx_dpdma.h:1:
+// SPDX-License-Identifier: GPL-2.0

Fixes: 188c310bdd5d ("dmaengine: xilinx_dpdma: stop using slave_id field")
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20211213052141.850807-1-vkoul@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/xilinx_dpdma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h
index 83a1377f03f8..02a4adf8921b 100644
--- a/include/linux/dma/xilinx_dpdma.h
+++ b/include/linux/dma/xilinx_dpdma.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_DMA_XILINX_DPDMA_H
 #define __LINUX_DMA_XILINX_DPDMA_H
 
-- 
cgit v1.2.3


From 04ce4a6b9b7b84eb6be7b544d3d0e748b6837764 Mon Sep 17 00:00:00 2001
From: Aswath Govindraju <a-govindraju@ti.com>
Date: Sun, 2 Jan 2022 23:38:12 +0100
Subject: dt-bindings: ti-serdes-mux: Add defines for J721S2 SoC

There are 4 lanes in the single instance of J721S2 SERDES. Each SERDES
lane mux can select upto 4 different IPs. Define all the possible
functions.

Signed-off-by: Aswath Govindraju <a-govindraju@ti.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Rosin <peda@axentia.se>
Link: https://lore.kernel.org/r/0571fd6b-ec4d-71b3-5cf7-6fa48ed5592c@axentia.se
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/dt-bindings/mux/ti-serdes.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/mux/ti-serdes.h b/include/dt-bindings/mux/ti-serdes.h
index d417b9268b16..d3116c52ab72 100644
--- a/include/dt-bindings/mux/ti-serdes.h
+++ b/include/dt-bindings/mux/ti-serdes.h
@@ -95,4 +95,26 @@
 #define AM64_SERDES0_LANE0_PCIE0		0x0
 #define AM64_SERDES0_LANE0_USB			0x1
 
+/* J721S2 */
+
+#define J721S2_SERDES0_LANE0_EDP_LANE0		0x0
+#define J721S2_SERDES0_LANE0_PCIE1_LANE0	0x1
+#define J721S2_SERDES0_LANE0_IP3_UNUSED		0x2
+#define J721S2_SERDES0_LANE0_IP4_UNUSED		0x3
+
+#define J721S2_SERDES0_LANE1_EDP_LANE1		0x0
+#define J721S2_SERDES0_LANE1_PCIE1_LANE1	0x1
+#define J721S2_SERDES0_LANE1_USB		0x2
+#define J721S2_SERDES0_LANE1_IP4_UNUSED		0x3
+
+#define J721S2_SERDES0_LANE2_EDP_LANE2		0x0
+#define J721S2_SERDES0_LANE2_PCIE1_LANE2	0x1
+#define J721S2_SERDES0_LANE2_IP3_UNUSED		0x2
+#define J721S2_SERDES0_LANE2_IP4_UNUSED		0x3
+
+#define J721S2_SERDES0_LANE3_EDP_LANE3		0x0
+#define J721S2_SERDES0_LANE3_PCIE1_LANE3	0x1
+#define J721S2_SERDES0_LANE3_USB		0x2
+#define J721S2_SERDES0_LANE3_IP4_UNUSED		0x3
+
 #endif /* _DT_BINDINGS_MUX_TI_SERDES */
-- 
cgit v1.2.3


From ccae4a19c9140a34a0c5f0658812496dd8bbdeaf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 25 Oct 2021 17:31:54 +0100
Subject: btrfs: remove no longer needed logic for replaying directory deletes

Now that we log only dir index keys when logging a directory, we no longer
need to deal with dir item keys in the log replay code for replaying
directory deletes. This is also true for the case when we replay a log
tree created by a kernel that still logs dir items.

So remove the remaining code of the replay of directory deletes algorithm
that deals with dir item keys.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs_tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index e1c4c732aaba..5416f1f1a77a 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -146,7 +146,9 @@
 
 /*
  * dir items are the name -> inode pointers in a directory.  There is one
- * for every name in a directory.
+ * for every name in a directory.  BTRFS_DIR_LOG_ITEM_KEY is no longer used
+ * but it's still defined here for documentation purposes and to help avoid
+ * having its numerical value reused in the future.
  */
 #define BTRFS_DIR_LOG_ITEM_KEY  60
 #define BTRFS_DIR_LOG_INDEX_KEY 72
-- 
cgit v1.2.3


From 2e4e97abac4c95f8b87b2912ea013f7836a6f10b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 5 Nov 2021 16:45:29 -0400
Subject: btrfs: pass fs_info to trace_btrfs_transaction_commit

The root on the trans->root can be anything, and generally we're
committing from the transaction kthread so it's usually the tree_root.
Change this to just take an fs_info, and to maintain compatibility
simply put the ROOT_TREE_OBJECTID as the root objectid for the
tracepoint.  This will allow use to remove trans->root.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 8f58fd95efc7..0d729664b4b4 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -182,18 +182,18 @@ FLUSH_STATES
 
 TRACE_EVENT(btrfs_transaction_commit,
 
-	TP_PROTO(const struct btrfs_root *root),
+	TP_PROTO(const struct btrfs_fs_info *fs_info),
 
-	TP_ARGS(root),
+	TP_ARGS(fs_info),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,  generation		)
 		__field(	u64,  root_objectid		)
 	),
 
-	TP_fast_assign_btrfs(root->fs_info,
-		__entry->generation	= root->fs_info->generation;
-		__entry->root_objectid	= root->root_key.objectid;
+	TP_fast_assign_btrfs(fs_info,
+		__entry->generation	= fs_info->generation;
+		__entry->root_objectid	= BTRFS_ROOT_TREE_OBJECTID;
 	),
 
 	TP_printk_btrfs("root=%llu(%s) gen=%llu",
-- 
cgit v1.2.3


From 25fd330370ac40653671f323acc7fb6db27ef6fe Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 15 Dec 2021 02:01:18 +0100
Subject: power: supply_core: Pass pointer to battery info

The function to retrieve battery info (from the device tree) assumes
we have a static info struct that gets populated by calling into
power_supply_get_battery_info().

This is awkward since I want to support tables of static battery
info by just assigning a pointer to all info based on e.g. a
compatible value in the device tree.

We also have a mixture of static and dynamically allocated
variables here.

Bite the bullet and let power_supply_get_battery_info() allocate
also the memory used for the very top level
struct power_supply_battery_info container. Pass pointers
around and lifecycle this with the psy device just like the
stuff we allocate inside it.

Change all current users over.

As part of the change, initializers need to be added to some
previously uninitialized fields in struct
power_supply_battery_info.

Reviewed-By: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f6e94eae4f28..86b4d5c4dab9 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -575,7 +575,7 @@ devm_power_supply_get_by_phandle(struct device *dev, const char *property)
 #endif /* CONFIG_OF */
 
 extern int power_supply_get_battery_info(struct power_supply *psy,
-					 struct power_supply_battery_info *info);
+					 struct power_supply_battery_info **info_out);
 extern void power_supply_put_battery_info(struct power_supply *psy,
 					  struct power_supply_battery_info *info);
 extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table,
-- 
cgit v1.2.3


From d5dbcca70182501bed99de85c224cef04c38ed92 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 Jan 2022 17:24:08 +0100
Subject: pktcdvd: convert to use attribute groups

There is no need to create kobject children of the pktcdvd device just
to display a subdirectory name.  Instead, use a named attribute group
which removes the extra kobjects and also fixes the userspace race where
the device is created yet tools like libudev can not see the attributes
as they think the subdirectories are some other sort of device.

Cc: linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220103162408.742003-1-gregkh@linuxfoundation.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/pktcdvd.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index c391e694aa26..f9c5ac80d59b 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -152,14 +152,6 @@ struct packet_stacked_data
 };
 #define PSD_POOL_SIZE		64
 
-struct pktcdvd_kobj
-{
-	struct kobject		kobj;
-	struct pktcdvd_device	*pd;
-};
-#define to_pktcdvdkobj(_k) \
-  ((struct pktcdvd_kobj*)container_of(_k,struct pktcdvd_kobj,kobj))
-
 struct pktcdvd_device
 {
 	struct block_device	*bdev;		/* dev attached */
@@ -197,8 +189,6 @@ struct pktcdvd_device
 	int			write_congestion_on;
 
 	struct device		*dev;		/* sysfs pktcdvd[0-7] dev */
-	struct pktcdvd_kobj	*kobj_stat;	/* sysfs pktcdvd[0-7]/stat/     */
-	struct pktcdvd_kobj	*kobj_wqueue;	/* sysfs pktcdvd[0-7]/write_queue/ */
 
 	struct dentry		*dfs_d_root;	/* debugfs: devname directory */
 	struct dentry		*dfs_f_info;	/* debugfs: info file */
-- 
cgit v1.2.3


From 6c952a0dc9c3ced98c4c8aa7cd11c25c59157f1f Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:26 +0100
Subject: ata: libata: Add ata_port_classify() helper

Add an ata_port_classify() helper to print out the results from
the device classification and remove the debugging statements
from ata_dev_classify().

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2a8404b26083..235fdbeb19ea 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1160,6 +1160,8 @@ extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
 		 unsigned int n_elem);
 extern unsigned int ata_dev_classify(const struct ata_taskfile *tf);
+extern unsigned int ata_port_classify(struct ata_port *ap,
+				      const struct ata_taskfile *tf);
 extern void ata_dev_disable(struct ata_device *adev);
 extern void ata_id_string(const u16 *id, unsigned char *s,
 			  unsigned int ofs, unsigned int len);
-- 
cgit v1.2.3


From f8ec26d0f5bcdb864f771fb6d250d9ed3165eb61 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:29 +0100
Subject: ata: libata: add reset tracepoints

To follow the flow of control we should be using tracepoints, as
they will tie in with the actual I/O flow and deliver a better
overview about what it happening.
This patch adds tracepoints for hard reset, soft reset, and postreset
and adds them in the libata-eh control flow.
With that we can drop the reset DPRINTK calls in the various drivers.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/trace/events/libata.h | 96 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index ab69434e2329..ec2a350d1aca 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -132,6 +132,22 @@
 		ata_protocol_name(ATAPI_PROT_PIO),	\
 		ata_protocol_name(ATAPI_PROT_DMA))
 
+#define ata_class_name(class)	{ class, #class }
+#define show_class_name(val)				\
+	__print_symbolic(val,				\
+		ata_class_name(ATA_DEV_UNKNOWN),	\
+		ata_class_name(ATA_DEV_ATA),		\
+		ata_class_name(ATA_DEV_ATA_UNSUP),	\
+		ata_class_name(ATA_DEV_ATAPI),		\
+		ata_class_name(ATA_DEV_ATAPI_UNSUP),	\
+		ata_class_name(ATA_DEV_PMP),		\
+		ata_class_name(ATA_DEV_PMP_UNSUP),	\
+		ata_class_name(ATA_DEV_SEMB),		\
+		ata_class_name(ATA_DEV_SEMB_UNSUP),	\
+		ata_class_name(ATA_DEV_ZAC),		\
+		ata_class_name(ATA_DEV_ZAC_UNSUP),	\
+		ata_class_name(ATA_DEV_NONE))
+
 const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
 #define __parse_status(s) libata_trace_parse_status(p, s)
 
@@ -329,6 +345,86 @@ TRACE_EVENT(ata_eh_link_autopsy_qc,
 		  __parse_eh_err_mask(__entry->eh_err_mask))
 );
 
+DECLARE_EVENT_CLASS(ata_link_reset_begin_template,
+
+	TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+
+	TP_ARGS(link, class, deadline),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__array( unsigned int,	class, 2 )
+		__field( unsigned long,	deadline )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= link->ap->print_id;
+		memcpy(__entry->class, class, 2);
+		__entry->deadline	= deadline;
+	),
+
+	TP_printk("ata_port=%u deadline=%lu classes=[%s,%s]",
+		  __entry->ata_port, __entry->deadline,
+		  show_class_name(__entry->class[0]),
+		  show_class_name(__entry->class[1]))
+);
+
+DEFINE_EVENT(ata_link_reset_begin_template, ata_link_hardreset_begin,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+	     TP_ARGS(link, class, deadline));
+
+DEFINE_EVENT(ata_link_reset_begin_template, ata_slave_hardreset_begin,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+	     TP_ARGS(link, class, deadline));
+
+DEFINE_EVENT(ata_link_reset_begin_template, ata_link_softreset_begin,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
+	     TP_ARGS(link, class, deadline));
+
+DECLARE_EVENT_CLASS(ata_link_reset_end_template,
+
+	TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+
+	TP_ARGS(link, class, rc),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__array( unsigned int,	class, 2 )
+		__field( int,		rc	)
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= link->ap->print_id;
+		memcpy(__entry->class, class, 2);
+		__entry->rc		= rc;
+	),
+
+	TP_printk("ata_port=%u rc=%d class=[%s,%s]",
+		  __entry->ata_port, __entry->rc,
+		  show_class_name(__entry->class[0]),
+		  show_class_name(__entry->class[1]))
+);
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_link_hardreset_end,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+	     TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_slave_hardreset_end,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+	     TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_link_softreset_end,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+	     TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_link_postreset,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+	     TP_ARGS(link, class, rc));
+
+DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset,
+	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
+	     TP_ARGS(link, class, rc));
+
 #endif /*  _TRACE_LIBATA_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From fc914faad67f237528739ec717222a560c97e723 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:30 +0100
Subject: ata: libata: add qc_prep tracepoint

Convert the existing ata_qc_issue() tracepoint into a template,
and add tracepoints for ata_qc_prep() and ata_qc_issue() based
on that template.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/trace/events/libata.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index ec2a350d1aca..2394fc2b2831 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -164,7 +164,7 @@ const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char,
 				      unsigned char, unsigned char);
 #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h)
 
-TRACE_EVENT(ata_qc_issue,
+DECLARE_EVENT_CLASS(ata_qc_issue_template,
 
 	TP_PROTO(struct ata_queued_cmd *qc),
 
@@ -223,6 +223,14 @@ TRACE_EVENT(ata_qc_issue,
 		  __entry->dev)
 );
 
+DEFINE_EVENT(ata_qc_issue_template, ata_qc_prep,
+	     TP_PROTO(struct ata_queued_cmd *qc),
+	     TP_ARGS(qc));
+
+DEFINE_EVENT(ata_qc_issue_template, ata_qc_issue,
+	     TP_PROTO(struct ata_queued_cmd *qc),
+	     TP_ARGS(qc));
+
 DECLARE_EVENT_CLASS(ata_qc_complete_template,
 
 	TP_PROTO(struct ata_queued_cmd *qc),
-- 
cgit v1.2.3


From 9795ded7f924b6486e54976619b1b094fcc1969d Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Mon, 3 Jan 2022 13:44:50 +0200
Subject: net/sched: act_ct: Fill offloading tuple iifidx

Driver offloading ct tuples can use the information of which devices
received the packets that created the offloaded connections, to
more efficiently offload them only to the relevant device.

Add new act_ct nf conntrack extension, which is used to store the skb
devices before offloading the connection, and then fill in the tuple
iifindex so drivers can get the device via metadata dissector match.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_act_ct.h | 50 +++++++++++++++++++++++++++++
 include/net/netfilter/nf_conntrack_extend.h |  4 +++
 2 files changed, 54 insertions(+)
 create mode 100644 include/net/netfilter/nf_conntrack_act_ct.h

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h
new file mode 100644
index 000000000000..078d3c52c03f
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_act_ct.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_ACT_CT_H
+#define _NF_CONNTRACK_ACT_CT_H
+
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_act_ct_ext {
+	int ifindex[IP_CT_DIR_MAX];
+};
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+	return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+#else
+	return NULL;
+#endif
+}
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+	struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+
+	if (act_ct)
+		return act_ct;
+
+	act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC);
+	return act_ct;
+#else
+	return NULL;
+#endif
+}
+
+static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct,
+					   enum ip_conntrack_info ctinfo)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+	struct nf_conn_act_ct_ext *act_ct_ext;
+
+	act_ct_ext = nf_conn_act_ct_ext_find(ct);
+	if (dev_net(skb->dev) == &init_net && act_ct_ext)
+		act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex;
+#endif
+}
+
+#endif /* _NF_CONNTRACK_ACT_CT_H */
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index e1e588387103..c7515d82ab06 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -27,6 +27,9 @@ enum nf_ct_ext_id {
 #endif
 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
 	NF_CT_EXT_SYNPROXY,
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+	NF_CT_EXT_ACT_CT,
 #endif
 	NF_CT_EXT_NUM,
 };
@@ -40,6 +43,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
 #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
 #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
+#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
-- 
cgit v1.2.3


From fa55a7d745de2d10489295b0674a403e2a5d490d Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 3 Jan 2022 18:11:30 +0100
Subject: seg6: export get_srh() for ICMP handling

An ICMP error message can contain in its message body part of an IPv6
packet which invoked the error. Such a packet might contain a segment
router header. Export get_srh() so the ICMP code can make use of it.

Since his changes the scope of the function from local to global, add
the seg6_ prefix to keep the namespace clean. And move it into seg6.c
so it is always available, not just when IPV6_SEG6_LWTUNNEL is
enabled.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/seg6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/seg6.h b/include/net/seg6.h
index 9d19c15e8545..a6f25983670a 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -58,6 +58,7 @@ extern int seg6_local_init(void);
 extern void seg6_local_exit(void);
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
+extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags);
 extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
 			     int proto);
 extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
-- 
cgit v1.2.3


From e41294408c56c68ea0f269d757527bf33b39118a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 3 Jan 2022 18:11:31 +0100
Subject: icmp: ICMPV6: Examine invoking packet for Segment Route Headers.

RFC8754 says:

ICMP error packets generated within the SR domain are sent to source
nodes within the SR domain.  The invoking packet in the ICMP error
message may contain an SRH.  Since the destination address of a packet
with an SRH changes as each segment is processed, it may not be the
destination used by the socket or application that generated the
invoking packet.

For the source of an invoking packet to process the ICMP error
message, the ultimate destination address of the IPv6 header may be
required.  The following logic is used to determine the destination
address for use by protocol-error handlers.

*  Walk all extension headers of the invoking IPv6 packet to the
   routing extension header preceding the upper-layer header.

   -  If routing header is type 4 Segment Routing Header (SRH)

      o  The SID at Segment List[0] may be used as the destination
         address of the invoking packet.

Mangle the skb so the network header points to the invoking packet
inside the ICMP packet. The seg6 helpers can then be used on the skb
to find any segment routing headers. If found, mark this fact in the
IPv6 control block of the skb, and store the offset into the packet of
the SRH. Then restore the skb back to its old state.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 2 ++
 include/net/seg6.h   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 20c1f968da7c..a59d25f19385 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -133,6 +133,7 @@ struct inet6_skb_parm {
 	__u16			dsthao;
 #endif
 	__u16			frag_max_size;
+	__u16			srhoff;
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
@@ -142,6 +143,7 @@ struct inet6_skb_parm {
 #define IP6SKB_HOPBYHOP        32
 #define IP6SKB_L3SLAVE         64
 #define IP6SKB_JUMBOGRAM      128
+#define IP6SKB_SEG6	      256
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
diff --git a/include/net/seg6.h b/include/net/seg6.h
index a6f25983670a..02b0cd305787 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -59,6 +59,7 @@ extern void seg6_local_exit(void);
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
 extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags);
+extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt);
 extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
 			     int proto);
 extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
-- 
cgit v1.2.3


From 222a011efc839ca1f51bf89fe7a2b3705fa55ccd Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 3 Jan 2022 18:11:32 +0100
Subject: udp6: Use Segment Routing Header for dest address if present

When finding the socket to report an error on, if the invoking packet
is using Segment Routing, the IPv6 destination address is that of an
intermediate router, not the end destination. Extract the ultimate
destination address from the segment address.

This change allows traceroute to function in the presence of Segment
Routing.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/seg6.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/seg6.h b/include/net/seg6.h
index 02b0cd305787..af668f17b398 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -65,4 +65,23 @@ extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
 extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
 extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 			       u32 tbl_id);
+
+/* If the packet which invoked an ICMP error contains an SRH return
+ * the true destination address from within the SRH, otherwise use the
+ * destination address in the IP header.
+ */
+static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb,
+						    struct inet6_skb_parm *opt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	if (opt->flags & IP6SKB_SEG6) {
+		srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff);
+		return  &srh->segments[0];
+	}
+
+	return NULL;
+}
+
+
 #endif
-- 
cgit v1.2.3


From 1de6b15a434c0068253fea5d719f71143e7e3a79 Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16@zte.com.cn>
Date: Tue, 4 Jan 2022 10:59:34 +0000
Subject: Namespaceify min_pmtu sysctl

This patch enables the sysctl min_pmtu to be configured per net
namespace.

Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 6c5b2efc4f17..1ecbf82b07f1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -85,6 +85,8 @@ struct netns_ipv4 {
 	int sysctl_icmp_ratelimit;
 	int sysctl_icmp_ratemask;
 
+	u32 ip_rt_min_pmtu;
+
 	struct local_ports ip_local_ports;
 
 	u8 sysctl_tcp_ecn;
-- 
cgit v1.2.3


From 1135fad204805518462c1f0caaca6bcd52ba78cf Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16@zte.com.cn>
Date: Tue, 4 Jan 2022 10:59:47 +0000
Subject: Namespaceify mtu_expires sysctl

This patch enables the sysctl mtu_expires to be configured per net
namespace.

Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1ecbf82b07f1..78557643526e 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -86,6 +86,7 @@ struct netns_ipv4 {
 	int sysctl_icmp_ratemask;
 
 	u32 ip_rt_min_pmtu;
+	int ip_rt_mtu_expires;
 
 	struct local_ports ip_local_ports;
 
-- 
cgit v1.2.3


From f083266487690124481eac0869da850406fb3ed3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 29 Aug 2021 09:18:53 +0200
Subject: headers/uninline: Uninline single-use function:
 kobject_has_children()

This was the only usage of <linux/kref_api.h> in <linux/kobject_api.h>,
so we'll able to decouple the two after this change.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index ad90b49824dc..c7b47399b36a 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -117,23 +117,6 @@ extern void kobject_get_ownership(struct kobject *kobj,
 				  kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
-/**
- * kobject_has_children - Returns whether a kobject has children.
- * @kobj: the object to test
- *
- * This will return whether a kobject has other kobjects as children.
- *
- * It does NOT account for the presence of attribute files, only sub
- * directories. It also assumes there is no concurrent addition or
- * removal of such children, and thus relies on external locking.
- */
-static inline bool kobject_has_children(struct kobject *kobj)
-{
-	WARN_ON_ONCE(kref_read(&kobj->kref) == 0);
-
-	return kobj->sd && kobj->sd->dir.subdirs;
-}
-
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
 	const struct sysfs_ops *sysfs_ops;
-- 
cgit v1.2.3


From 57a18322227134e37b693ef8ef216ed7ce7ba7d6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 30 Dec 2021 20:31:19 +0100
Subject: ACPI / x86: Introduce an acpi_quirk_skip_acpi_ac_and_battery() helper

Some x86 ACPI boards have broken AC and battery ACPI devices in their ACPI
tables. This is often tied to these devices using certain PMICs where the
factory OS image seems to be using native charger and fuel-gauge drivers
instead.

So far both the AC and battery drivers have almost identical checks for
these PMICs including both of them having a DMI based mechanism to force
usage of the ACPI AC and battery drivers on some boards even though one
of these PMICs is present, with the same 2 boards listed in both driver's
DMI tables for this.

The only difference is that the AC driver checks for 2 PMICs and the
battery driver only for one. This has grown this way because the other
(Whiskey Cove) PMIC is only used on a few boards (3 known boards) and
although some of these do have non working ACPI battery devices, their
_STA method always returns 0, but that really should not be relied on.

This patch factors out the shared checks into a new
acpi_quirk_skip_acpi_ac_and_battery() helper and moves the AC and
battery drivers over to this new helper.

Note the DMI table is shared with acpi_quirk_skip_i2c_client_enumeration()
and acpi_quirk_skip_serdev_enumeration(), because boards needing DMI quirks
for either of these typically also have broken AC and battery ACPI devices.

The ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY quirk is not set yet on boards
already in this DMI table, to avoid introducing any functional changes
in this refactoring patch.

Besided sharing the code between the AC and battery drivers this
refactoring also moves this quirk handling to under #ifdef CONFIG_X86,
removing this x86 specific code from non x86 ACPI builds.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 80c425bc98a7..04b9ace9689b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -615,12 +615,17 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 
 #ifdef CONFIG_X86
 bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status);
+bool acpi_quirk_skip_acpi_ac_and_battery(void);
 #else
 static inline bool acpi_device_override_status(struct acpi_device *adev,
 					       unsigned long long *status)
 {
 	return false;
 }
+static inline bool acpi_quirk_skip_acpi_ac_and_battery(void)
+{
+	return false;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
-- 
cgit v1.2.3


From acb99b9b2a08f439a13eafffd2a59b798d983462 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 28 Dec 2021 22:14:53 +0100
Subject: mac80211: Add stations iterator where the iterator function may sleep

ieee80211_iterate_active_interfaces() and
ieee80211_iterate_active_interfaces_atomic() already exist, where the
former allows the iterator function to sleep. Add
ieee80211_iterate_stations() which is similar to
ieee80211_iterate_stations_atomic() but allows the iterator to sleep.
This is needed for adding SDIO support to the rtw88 driver. Some
interators there are reading or writing registers. With the SDIO ops
(sdio_readb, sdio_writeb and friends) this means that the iterator
function may sleep.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20211228211501.468981-2-martin.blumenstingl@googlemail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8907338d52b5..c50221d7e82c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5614,6 +5614,9 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
  * This version can only be used while holding the wiphy mutex.
+ * The driver must not call this with a lock held that it can also take in
+ * response to callbacks from mac80211, and it must not call this within
+ * callbacks made by mac80211 - both would result in deadlocks.
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
@@ -5627,6 +5630,24 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw,
 						struct ieee80211_vif *vif),
 					     void *data);
 
+/**
+ * ieee80211_iterate_stations - iterate stations
+ *
+ * This function iterates over all stations associated with a given
+ * hardware that are currently uploaded to the driver and calls the callback
+ * function for them.
+ * This function allows the iterator function to sleep, when the iterator
+ * function is atomic @ieee80211_iterate_stations_atomic can be used.
+ *
+ * @hw: the hardware struct of which the interfaces should be iterated over
+ * @iterator: the iterator function to call, cannot sleep
+ * @data: first argument of the iterator function
+ */
+void ieee80211_iterate_stations(struct ieee80211_hw *hw,
+				void (*iterator)(void *data,
+						 struct ieee80211_sta *sta),
+				void *data);
+
 /**
  * ieee80211_iterate_stations_atomic - iterate stations
  *
-- 
cgit v1.2.3


From 5bc03b28ec24670e67c453ecb2bfbb9053b86838 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 20 Dec 2021 11:51:47 +0100
Subject: nl80211: clarify comment for mesh PLINK_BLOCKED state

When a mesh link is in blocked state, it is very useful to still allow
auth requests from the peer to re-establish it.
When a remote node is power cycled, the peer state can easily end up
in blocked state if multiple auth attempts are performed. Since this
can lead to several minutes of downtime, we should accept auth attempts
of the peer after it has come back.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20211220105147.88625-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f1a9d6594df2..195a238a322e 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5623,7 +5623,7 @@ enum nl80211_if_combination_attrs {
  * @NL80211_PLINK_ESTAB: mesh peer link is established
  * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled
  * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh
- *	plink are discarded
+ *	plink are discarded, except for authentication frames
  * @NUM_NL80211_PLINK_STATES: number of peer link states
  * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states
  */
-- 
cgit v1.2.3


From 6c9eeb5f4a9bb2b11a40fd0f15efde7bd33ee908 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Jan 2022 17:19:40 +0200
Subject: KVM: arm64: vgic: Replace kernel.h with the necessary inclusions

arm_vgic.h does not require all the stuff that kernel.h provides.
Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220104151940.55399-1-andriy.shevchenko@linux.intel.com
---
 include/kvm/arm_vgic.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index e602d848fc1a..bb30a6803d9f 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -5,9 +5,11 @@
 #ifndef __KVM_ARM_VGIC_H
 #define __KVM_ARM_VGIC_H
 
-#include <linux/kernel.h>
+#include <linux/bits.h>
 #include <linux/kvm.h>
 #include <linux/irqreturn.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/static_key.h>
 #include <linux/types.h>
-- 
cgit v1.2.3


From d9c19d32d86fa54934b632c4314beb067bf98378 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 18 Oct 2021 10:39:06 -0400
Subject: iov_iter: Add copy_folio_to_iter()

This wrapper around copy_page_to_iter() works because copy_page_to_iter()
handles compound pages correctly.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/uio.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6350354f97e9..43321dbebba8 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -7,6 +7,7 @@
 
 #include <linux/kernel.h>
 #include <linux/thread_info.h>
+#include <linux/mm_types.h>
 #include <uapi/linux/uio.h>
 
 struct page;
@@ -146,6 +147,12 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 
+static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
+		size_t bytes, struct iov_iter *i)
+{
+	return copy_page_to_iter(&folio->page, offset, bytes, i);
+}
+
 static __always_inline __must_check
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
-- 
cgit v1.2.3


From 5bf34d7c7ffe773c3b3c1b6ebf39e0f34a2436ec Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 28 Nov 2021 14:24:43 -0500
Subject: mm: Add folio_test_pmd_mappable()

Add a predicate to determine if the folio might be mapped by a PMD entry.
If CONFIG_TRANSPARENT_HUGEPAGE is disabled, we know it can't be, even
if it's large enough.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/huge_mm.h | 14 ++++++++++++++
 include/linux/mm.h      | 42 +++++++++++++++++++++---------------------
 2 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f280f33ff223..e4c18ba8d3bf 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -274,6 +274,15 @@ static inline int thp_nr_pages(struct page *page)
 	return 1;
 }
 
+/**
+ * folio_test_pmd_mappable - Can we map this folio with a PMD?
+ * @folio: The folio to test
+ */
+static inline bool folio_test_pmd_mappable(struct folio *folio)
+{
+	return folio_order(folio) >= HPAGE_PMD_ORDER;
+}
+
 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
@@ -339,6 +348,11 @@ static inline int thp_nr_pages(struct page *page)
 	return 1;
 }
 
+static inline bool folio_test_pmd_mappable(struct folio *folio)
+{
+	return false;
+}
+
 static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
 	return false;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..72ca04f16711 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -714,6 +714,27 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);
 struct mmu_gather;
 struct inode;
 
+static inline unsigned int compound_order(struct page *page)
+{
+	if (!PageHead(page))
+		return 0;
+	return page[1].compound_order;
+}
+
+/**
+ * folio_order - The allocation order of a folio.
+ * @folio: The folio.
+ *
+ * A folio is composed of 2^order pages.  See get_order() for the definition
+ * of order.
+ *
+ * Return: The order of the folio.
+ */
+static inline unsigned int folio_order(struct folio *folio)
+{
+	return compound_order(&folio->page);
+}
+
 #include <linux/huge_mm.h>
 
 /*
@@ -906,27 +927,6 @@ static inline void destroy_compound_page(struct page *page)
 	compound_page_dtors[page[1].compound_dtor](page);
 }
 
-static inline unsigned int compound_order(struct page *page)
-{
-	if (!PageHead(page))
-		return 0;
-	return page[1].compound_order;
-}
-
-/**
- * folio_order - The allocation order of a folio.
- * @folio: The folio.
- *
- * A folio is composed of 2^order pages.  See get_order() for the definition
- * of order.
- *
- * Return: The order of the folio.
- */
-static inline unsigned int folio_order(struct folio *folio)
-{
-	return compound_order(&folio->page);
-}
-
 static inline bool hpage_pincount_available(struct page *page)
 {
 	/*
-- 
cgit v1.2.3


From 9f2b04a25a41b1f41b3cead4f56854a4192ec5b0 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 16 Aug 2021 23:36:31 -0400
Subject: filemap: Add folio_put_wait_locked()

Convert all three callers of put_and_wait_on_page_locked() to
folio_put_wait_locked().  This shrinks the kernel overall by 19 bytes.
filemap_update_page() shrinks by 19 bytes while __migration_entry_wait()
is unchanged.  folio_put_wait_locked() is 14 bytes smaller than
put_and_wait_on_page_locked(), but pmd_migration_entry_wait() grows by
14 bytes.  It removes the assumption from pmd_migration_entry_wait()
that pages cannot be larger than a PMD (which is true today, but
may be interesting to explore in the future).

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 605246452305..841f7ba62d7d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -868,7 +868,7 @@ static inline int wait_on_page_locked_killable(struct page *page)
 	return folio_wait_locked_killable(page_folio(page));
 }
 
-int put_and_wait_on_page_locked(struct page *page, int state);
+int folio_put_wait_locked(struct folio *folio, int state);
 void wait_on_page_writeback(struct page *page);
 void folio_wait_writeback(struct folio *folio);
 int folio_wait_writeback_killable(struct folio *folio);
-- 
cgit v1.2.3


From 621db4880d305bc37b343b1671e03b7eb5d61389 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 8 May 2021 20:04:05 -0400
Subject: filemap: Add filemap_unaccount_folio()

Replace unaccount_page_cache_page() with filemap_unaccount_folio().
The bug handling path could be a bit more robust (eg taking into account
the mapcounts of tail pages), but it's really never supposed to happen.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 841f7ba62d7d..077b6f378666 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -884,11 +884,6 @@ static inline void __set_page_dirty(struct page *page,
 }
 void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
 			  struct bdi_writeback *wb);
-static inline void account_page_cleaned(struct page *page,
-		struct address_space *mapping, struct bdi_writeback *wb)
-{
-	return folio_account_cleaned(page_folio(page), mapping, wb);
-}
 void __folio_cancel_dirty(struct folio *folio);
 static inline void folio_cancel_dirty(struct folio *folio)
 {
-- 
cgit v1.2.3


From a0580c6f9babaf4413c8a7e2ab21d68e31f4c754 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 23 Jul 2021 09:29:46 -0400
Subject: filemap: Convert tracing of page cache operations to folio

Pass the folio instead of a page.  The page was already implicitly a
folio as it accessed page->mapping directly.  Add the order of the folio
to the tracepoint, as this is important information.  Also drop printing
the address of the struct page as the pfn provides better information
than the struct page address.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/trace/events/filemap.h | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
index c47b63db124e..46c89c1e460c 100644
--- a/include/trace/events/filemap.h
+++ b/include/trace/events/filemap.h
@@ -15,43 +15,45 @@
 
 DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
 
-	TP_PROTO(struct page *page),
+	TP_PROTO(struct folio *folio),
 
-	TP_ARGS(page),
+	TP_ARGS(folio),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, pfn)
 		__field(unsigned long, i_ino)
 		__field(unsigned long, index)
 		__field(dev_t, s_dev)
+		__field(unsigned char, order)
 	),
 
 	TP_fast_assign(
-		__entry->pfn = page_to_pfn(page);
-		__entry->i_ino = page->mapping->host->i_ino;
-		__entry->index = page->index;
-		if (page->mapping->host->i_sb)
-			__entry->s_dev = page->mapping->host->i_sb->s_dev;
+		__entry->pfn = folio_pfn(folio);
+		__entry->i_ino = folio->mapping->host->i_ino;
+		__entry->index = folio->index;
+		if (folio->mapping->host->i_sb)
+			__entry->s_dev = folio->mapping->host->i_sb->s_dev;
 		else
-			__entry->s_dev = page->mapping->host->i_rdev;
+			__entry->s_dev = folio->mapping->host->i_rdev;
+		__entry->order = folio_order(folio);
 	),
 
-	TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu",
+	TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u",
 		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
 		__entry->i_ino,
-		pfn_to_page(__entry->pfn),
 		__entry->pfn,
-		__entry->index << PAGE_SHIFT)
+		__entry->index << PAGE_SHIFT,
+		__entry->order)
 );
 
 DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache,
-	TP_PROTO(struct page *page),
-	TP_ARGS(page)
+	TP_PROTO(struct folio *folio),
+	TP_ARGS(folio)
 	);
 
 DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache,
-	TP_PROTO(struct page *page),
-	TP_ARGS(page)
+	TP_PROTO(struct folio *folio),
+	TP_ARGS(folio)
 	);
 
 TRACE_EVENT(filemap_set_wb_err,
-- 
cgit v1.2.3


From 452e9e6992fe058a650c81d01a9982e3faf10278 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 9 May 2021 09:33:42 -0400
Subject: filemap: Add filemap_remove_folio and __filemap_remove_folio

Reimplement __delete_from_page_cache() as a wrapper around
__filemap_remove_folio() and delete_from_page_cache() as a wrapper
around filemap_remove_folio().  Remove the EXPORT_SYMBOL as
delete_from_page_cache() was not used by any in-tree modules.
Convert page_cache_free_page() into filemap_free_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 077b6f378666..3f26b191ede3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -930,8 +930,13 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 		pgoff_t index, gfp_t gfp);
 int filemap_add_folio(struct address_space *mapping, struct folio *folio,
 		pgoff_t index, gfp_t gfp);
-extern void delete_from_page_cache(struct page *page);
-extern void __delete_from_page_cache(struct page *page, void *shadow);
+void filemap_remove_folio(struct folio *folio);
+void delete_from_page_cache(struct page *page);
+void __filemap_remove_folio(struct folio *folio, void *shadow);
+static inline void __delete_from_page_cache(struct page *page, void *shadow)
+{
+	__filemap_remove_folio(page_folio(page), shadow);
+}
 void replace_page_cache_page(struct page *old, struct page *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct pagevec *pvec);
-- 
cgit v1.2.3


From bb2e98b613a3c76c904dfa82eb4b86773817598b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 28 Nov 2021 16:14:50 -0500
Subject: filemap: Remove thp_contains()

This function is now unused, so delete it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3f26b191ede3..8c2cad7f0c36 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -512,15 +512,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 			mapping_gfp_mask(mapping));
 }
 
-/* Does this page contain this index? */
-static inline bool thp_contains(struct page *head, pgoff_t index)
-{
-	/* HugeTLBfs indexes the page cache in units of hpage_size */
-	if (PageHuge(head))
-		return head->index == index;
-	return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
-}
-
 #define swapcache_index(folio)	__page_file_index(&(folio)->page)
 
 /**
-- 
cgit v1.2.3


From 7836d9990079ed611199819ccf487061b748193a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 27 May 2021 12:30:54 -0400
Subject: readahead: Convert page_cache_async_ra() to take a folio

Using the folio here avoids checking whether it's a tail page.
This patch mostly just enables some of the following patches.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8c2cad7f0c36..30302be6977f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -993,7 +993,7 @@ struct readahead_control {
 void page_cache_ra_unbounded(struct readahead_control *,
 		unsigned long nr_to_read, unsigned long lookahead_count);
 void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
-void page_cache_async_ra(struct readahead_control *, struct page *,
+void page_cache_async_ra(struct readahead_control *, struct folio *,
 		unsigned long req_count);
 void readahead_expand(struct readahead_control *ractl,
 		      loff_t new_start, size_t new_len);
@@ -1040,7 +1040,7 @@ void page_cache_async_readahead(struct address_space *mapping,
 		struct page *page, pgoff_t index, unsigned long req_count)
 {
 	DEFINE_READAHEAD(ractl, file, ra, mapping, index);
-	page_cache_async_ra(&ractl, page, req_count);
+	page_cache_async_ra(&ractl, page_folio(page), req_count);
 }
 
 static inline struct folio *__readahead_folio(struct readahead_control *ractl)
-- 
cgit v1.2.3


From 539a3322f208db478db88c4a76239476defce6b1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 16 Dec 2020 11:45:30 -0500
Subject: filemap: Add read_cache_folio and read_mapping_folio

Reimplement read_cache_page() as a wrapper around read_cache_folio().
Saves over 400 bytes of text from do_read_cache_folio() which more
than makes up for the extra 100 bytes of text added to the various
wrapper functions.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 30302be6977f..7bef50ea5435 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -629,8 +629,10 @@ static inline struct page *grab_cache_page(struct address_space *mapping,
 	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
 }
 
-extern struct page * read_cache_page(struct address_space *mapping,
-				pgoff_t index, filler_t *filler, void *data);
+struct folio *read_cache_folio(struct address_space *, pgoff_t index,
+		filler_t *filler, void *data);
+struct page *read_cache_page(struct address_space *, pgoff_t index,
+		filler_t *filler, void *data);
 extern struct page * read_cache_page_gfp(struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern int read_cache_pages(struct address_space *mapping,
@@ -642,6 +644,12 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 	return read_cache_page(mapping, index, NULL, data);
 }
 
+static inline struct folio *read_mapping_folio(struct address_space *mapping,
+				pgoff_t index, void *data)
+{
+	return read_cache_folio(mapping, index, NULL, data);
+}
+
 /*
  * Get index of the page within radix-tree (but not for hugetlb pages).
  * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
-- 
cgit v1.2.3


From 82c50f8b443359ec99348cd9b1289f55cd47779d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 28 Jul 2021 15:14:48 -0400
Subject: filemap: Add filemap_release_folio()

Reimplement try_to_release_page() as a wrapper around
filemap_release_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/mm.h      | 1 -
 include/linux/pagemap.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 72ca04f16711..145f045b0ddc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1970,7 +1970,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
 			struct page **pages);
 struct page *get_dump_page(unsigned long addr);
 
-extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
 			      unsigned int length);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7bef50ea5435..eb6e58e106c8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -939,6 +939,8 @@ static inline void __delete_from_page_cache(struct page *page, void *shadow)
 void replace_page_cache_page(struct page *old, struct page *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct pagevec *pvec);
+int try_to_release_page(struct page *page, gfp_t gfp);
+bool filemap_release_folio(struct folio *folio, gfp_t gfp);
 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
 		int whence);
 
-- 
cgit v1.2.3


From 77e2a04745ff8e391ad402e2d2d1157a5d3a7ebc Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 4 Jan 2022 19:51:08 +0000
Subject: ACPI: PCC: Implement OperationRegion handler for the PCC Type 3
 subtype

PCC OpRegion provides a mechanism to communicate with the platform
directly from the AML. PCCT provides the list of PCC channel available
in the platform, a subset or all of them can be used in PCC Opregion.

This patch registers the PCC OpRegion handler before ACPI tables are
loaded. This relies on the special context data passed to identify and
set up the PCC channel before the OpRegion handler is executed for the
first time.

Typical PCC Opregion declaration looks like this:

OperationRegion (PFRM, PCC, 2, 0x74)
Field (PFRM, ByteAcc, NoLock, Preserve)
{
    SIGN,   32,
    FLGS,   32,
    LEN,    32,
    CMD,    32,
    DATA,   800
}

It contains four named double words followed by 100 bytes of buffer
names DATA.

ASL can fill out the buffer something like:

    /* Create global or local buffer */
    Name (BUFF, Buffer (0x0C){})
    /* Create double word fields over the buffer */
    CreateDWordField (BUFF, 0x0, WD0)
    CreateDWordField (BUFF, 0x04, WD1)
    CreateDWordField (BUFF, 0x08, WD2)

    /* Fill the named fields */
    WD0 = 0x50434300
    SIGN = BUFF
    WD0 = 1
    FLGS = BUFF
    WD0 = 0x10
    LEN = BUFF

    /* Fill the payload in the DATA buffer */
    WD0 = 0
    WD1 = 0x08
    WD2 = 0
    DATA = BUFF

    /* Write to CMD field to trigger handler */
    WD0 = 0x4404
    CMD = BUFF

This buffer is received by acpi_pcc_opregion_space_handler. This
handler will fetch the complete buffer via internal_pcc_buffer.

The setup handler will receive the special PCC context data which will
contain the PCC channel index which used to set up the channel. The
buffer pointer and length is saved in region context which is then used
in the handler.

(kernel test robot: Build failure with CONFIG_ACPI_DEBUGGER)
Link: https://lore.kernel.org/r/202201041539.feAV0l27-lkp@intel.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b28f8790192a..93eaba2485e3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1389,6 +1389,12 @@ static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
 }
 #endif
 
+#ifdef CONFIG_ACPI_PCC
+void acpi_init_pcc(void);
+#else
+static inline void acpi_init_pcc(void) { }
+#endif
+
 #ifdef CONFIG_ACPI
 extern void acpi_device_notify(struct device *dev);
 extern void acpi_device_notify_remove(struct device *dev);
-- 
cgit v1.2.3


From 81d3f500ee98cf9f5388f0fb0548ba5a57598827 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 29 Sep 2021 18:17:44 +0900
Subject: scsi: core: Fix scsi_mode_select() interface

The modepage argument is unused. Remove it.

Link: https://lore.kernel.org/r/20210929091744.706003-3-damien.lemoal@wdc.com
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_device.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d1c6fc83b1e3..7571e23d8d8a 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -415,9 +415,8 @@ extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
 			   int retries, struct scsi_mode_data *data,
 			   struct scsi_sense_hdr *);
 extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp,
-			    int modepage, unsigned char *buffer, int len,
-			    int timeout, int retries,
-			    struct scsi_mode_data *data,
+			    unsigned char *buffer, int len, int timeout,
+			    int retries, struct scsi_mode_data *data,
 			    struct scsi_sense_hdr *);
 extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout,
 				int retries, struct scsi_sense_hdr *sshdr);
-- 
cgit v1.2.3


From 403a2e236538c6b479ea5bfc8b75a75540cfba6b Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 13 Dec 2021 11:51:34 -0700
Subject: dmaengine: idxd: change MSIX allocation based on per wq activation

Change the driver where WQ interrupt is requested only when wq is being
enabled. This new scheme set things up so that request_threaded_irq() is
only called when a kernel wq type is being enabled. This also sets up for
future interrupt request where different interrupt handler such as wq
occupancy interrupt can be setup instead of the wq completion interrupt.

Not calling request_irq() until the WQ actually needs an irq also prevents
wasting of CPU irq vectors on x86 systems, which is a limited resource.

idxd_flush_pending_descs() is moved to device.c since descriptor flushing
is now part of wq disable rather than shutdown().

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/163942149487.2412839.6691222855803875848.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/uapi/linux/idxd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index c750eac09fc9..a8f0ff75c430 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -28,6 +28,7 @@ enum idxd_scmd_stat {
 	IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
 	IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
 	IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
+	IDXD_SCMD_WQ_IRQ_ERR = 0x80100000,
 };
 
 #define IDXD_SCMD_SOFTERR_MASK	0x80000000
-- 
cgit v1.2.3


From c206a389c97c9533971cd05eed69b49f535cc193 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:31 +0100
Subject: ata: libata: tracepoints for bus-master DMA

Add tracepoints for bus-master DMA and taskfile related functions.
That allows us to drop the relevant DPRINTK() calls.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/trace/events/libata.h | 125 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index 2394fc2b2831..acb9a4fc18ed 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -151,6 +151,9 @@
 const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
 #define __parse_status(s) libata_trace_parse_status(p, s)
 
+const char *libata_trace_parse_host_stat(struct trace_seq *, unsigned char);
+#define __parse_host_stat(s) libata_trace_parse_host_stat(p, s)
+
 const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int);
 #define __parse_eh_action(a) libata_trace_parse_eh_action(p, a)
 
@@ -299,6 +302,128 @@ DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done,
 	     TP_PROTO(struct ata_queued_cmd *qc),
 	     TP_ARGS(qc));
 
+TRACE_EVENT(ata_tf_load,
+
+	TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf),
+
+	TP_ARGS(ap, tf),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned char,	cmd	)
+		__field( unsigned char,	dev	)
+		__field( unsigned char,	lbal	)
+		__field( unsigned char,	lbam	)
+		__field( unsigned char,	lbah	)
+		__field( unsigned char,	nsect	)
+		__field( unsigned char,	feature	)
+		__field( unsigned char,	hob_lbal )
+		__field( unsigned char,	hob_lbam )
+		__field( unsigned char,	hob_lbah )
+		__field( unsigned char,	hob_nsect )
+		__field( unsigned char,	hob_feature )
+		__field( unsigned char,	proto	)
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= ap->print_id;
+		__entry->proto		= tf->protocol;
+		__entry->cmd		= tf->command;
+		__entry->dev		= tf->device;
+		__entry->lbal		= tf->lbal;
+		__entry->lbam		= tf->lbam;
+		__entry->lbah		= tf->lbah;
+		__entry->hob_lbal	= tf->hob_lbal;
+		__entry->hob_lbam	= tf->hob_lbam;
+		__entry->hob_lbah	= tf->hob_lbah;
+		__entry->feature	= tf->feature;
+		__entry->hob_feature	= tf->hob_feature;
+		__entry->nsect		= tf->nsect;
+		__entry->hob_nsect	= tf->hob_nsect;
+	),
+
+	TP_printk("ata_port=%u proto=%s cmd=%s%s " \
+		  " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)",
+		  __entry->ata_port,
+		  show_protocol_name(__entry->proto),
+		  show_opcode_name(__entry->cmd),
+		  __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect),
+		  __entry->cmd, __entry->feature, __entry->nsect,
+		  __entry->lbal, __entry->lbam, __entry->lbah,
+		  __entry->hob_feature, __entry->hob_nsect,
+		  __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah,
+		  __entry->dev)
+);
+
+DECLARE_EVENT_CLASS(ata_exec_command_template,
+
+	TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+
+	TP_ARGS(ap, tf, tag),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	tag	)
+		__field( unsigned char,	cmd	)
+		__field( unsigned char,	feature	)
+		__field( unsigned char,	hob_nsect )
+		__field( unsigned char,	proto	)
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= ap->print_id;
+		__entry->tag		= tag;
+		__entry->proto		= tf->protocol;
+		__entry->cmd		= tf->command;
+		__entry->feature	= tf->feature;
+		__entry->hob_nsect	= tf->hob_nsect;
+	),
+
+	TP_printk("ata_port=%u tag=%d proto=%s cmd=%s%s",
+		  __entry->ata_port, __entry->tag,
+		  show_protocol_name(__entry->proto),
+		  show_opcode_name(__entry->cmd),
+		  __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect))
+);
+
+DEFINE_EVENT(ata_exec_command_template, ata_exec_command,
+	     TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+	     TP_ARGS(ap, tf, tag));
+
+DEFINE_EVENT(ata_exec_command_template, ata_bmdma_setup,
+	     TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+	     TP_ARGS(ap, tf, tag));
+
+DEFINE_EVENT(ata_exec_command_template, ata_bmdma_start,
+	     TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+	     TP_ARGS(ap, tf, tag));
+
+DEFINE_EVENT(ata_exec_command_template, ata_bmdma_stop,
+	     TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag),
+	     TP_ARGS(ap, tf, tag));
+
+TRACE_EVENT(ata_bmdma_status,
+
+	TP_PROTO(struct ata_port *ap, unsigned int host_stat),
+
+	TP_ARGS(ap, host_stat),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	tag	)
+		__field( unsigned char,	host_stat )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= ap->print_id;
+		__entry->host_stat	= host_stat;
+	),
+
+	TP_printk("ata_port=%u host_stat=%s",
+		  __entry->ata_port,
+		  __parse_host_stat(__entry->host_stat))
+);
+
 TRACE_EVENT(ata_eh_link_autopsy,
 
 	TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask),
-- 
cgit v1.2.3


From 7fad6ad6a357c73f0bdf55476238ae2884de78a3 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:32 +0100
Subject: ata: libata-sff: tracepoints for HSM state machine

Add tracepoints for the HSM state machine and drop DPRINTK calls

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/trace/events/libata.h | 125 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index acb9a4fc18ed..fcb8fde39614 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -148,6 +148,15 @@
 		ata_class_name(ATA_DEV_ZAC_UNSUP),	\
 		ata_class_name(ATA_DEV_NONE))
 
+#define ata_sff_hsm_state_name(state)	{ state, #state }
+#define show_sff_hsm_state_name(val)				\
+    __print_symbolic(val,				\
+		ata_sff_hsm_state_name(HSM_ST_IDLE),	\
+		ata_sff_hsm_state_name(HSM_ST_FIRST),	\
+		ata_sff_hsm_state_name(HSM_ST),		\
+		ata_sff_hsm_state_name(HSM_ST_LAST),	\
+		ata_sff_hsm_state_name(HSM_ST_ERR))
+
 const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
 #define __parse_status(s) libata_trace_parse_status(p, s)
 
@@ -163,6 +172,9 @@ const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int);
 const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int);
 #define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f)
 
+const char *libata_trace_parse_tf_flags(struct trace_seq *, unsigned int);
+#define __parse_tf_flags(f) libata_trace_parse_tf_flags(p, f)
+
 const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char,
 				      unsigned char, unsigned char);
 #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h)
@@ -558,6 +570,119 @@ DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset,
 	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
 	     TP_ARGS(link, class, rc));
 
+DECLARE_EVENT_CLASS(ata_sff_hsm_template,
+
+	TP_PROTO(struct ata_queued_cmd *qc, unsigned char status),
+
+	TP_ARGS(qc, status),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	tag	)
+		__field( unsigned int,	qc_flags )
+		__field( unsigned int,	protocol )
+		__field( unsigned int,	hsm_state )
+		__field( unsigned char,	dev_state )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= qc->ap->print_id;
+		__entry->ata_dev	= qc->dev->link->pmp + qc->dev->devno;
+		__entry->tag		= qc->tag;
+		__entry->qc_flags	= qc->flags;
+		__entry->protocol	= qc->tf.protocol;
+		__entry->hsm_state	= qc->ap->hsm_task_state;
+		__entry->dev_state	= status;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s flags=%s task_state=%s dev_stat=0x%X",
+		  __entry->ata_port, __entry->ata_dev, __entry->tag,
+		  show_protocol_name(__entry->protocol),
+		  __parse_qc_flags(__entry->qc_flags),
+		  show_sff_hsm_state_name(__entry->hsm_state),
+		  __entry->dev_state)
+);
+
+DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_state,
+	TP_PROTO(struct ata_queued_cmd *qc, unsigned char state),
+	TP_ARGS(qc, state));
+
+DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_command_complete,
+	TP_PROTO(struct ata_queued_cmd *qc, unsigned char state),
+	TP_ARGS(qc, state));
+
+DEFINE_EVENT(ata_sff_hsm_template, ata_sff_port_intr,
+	TP_PROTO(struct ata_queued_cmd *qc, unsigned char state),
+	TP_ARGS(qc, state));
+
+DECLARE_EVENT_CLASS(ata_transfer_data_template,
+
+	TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+
+	TP_ARGS(qc, offset, count),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	tag	)
+		__field( unsigned int,	flags	)
+		__field( unsigned int,	offset	)
+		__field( unsigned int,	bytes	)
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= qc->ap->print_id;
+		__entry->ata_dev	= qc->dev->link->pmp + qc->dev->devno;
+		__entry->tag		= qc->tag;
+		__entry->flags		= qc->tf.flags;
+		__entry->offset		= offset;
+		__entry->bytes		= count;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s offset=%u bytes=%u",
+		  __entry->ata_port, __entry->ata_dev, __entry->tag,
+		  __parse_tf_flags(__entry->flags),
+		  __entry->offset, __entry->bytes)
+);
+
+DEFINE_EVENT(ata_transfer_data_template, ata_sff_pio_transfer_data,
+	     TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+	     TP_ARGS(qc, offset, count));
+
+DEFINE_EVENT(ata_transfer_data_template, atapi_pio_transfer_data,
+	     TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+	     TP_ARGS(qc, offset, count));
+
+DEFINE_EVENT(ata_transfer_data_template, atapi_send_cdb,
+	     TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count),
+	     TP_ARGS(qc, offset, count));
+
+DECLARE_EVENT_CLASS(ata_sff_template,
+
+	TP_PROTO(struct ata_port *ap),
+
+	TP_ARGS(ap),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned char,	hsm_state )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= ap->print_id;
+		__entry->hsm_state	= ap->hsm_task_state;
+	),
+
+	TP_printk("ata_port=%u task_state=%s",
+		  __entry->ata_port,
+		  show_sff_hsm_state_name(__entry->hsm_state))
+);
+
+DEFINE_EVENT(ata_sff_template, ata_sff_flush_pio_task,
+	     TP_PROTO(struct ata_port *ap),
+	     TP_ARGS(ap));
+
 #endif /*  _TRACE_LIBATA_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From c318458c9359ce3d10943b0c15a9b9f43dda7b2e Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:34 +0100
Subject: ata: libata: add tracepoints for ATA error handling

Add tracepoints for ATA error handling.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/trace/events/libata.h | 60 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
index fcb8fde39614..d4e631aa976f 100644
--- a/include/trace/events/libata.h
+++ b/include/trace/events/libata.h
@@ -490,6 +490,37 @@ TRACE_EVENT(ata_eh_link_autopsy_qc,
 		  __parse_eh_err_mask(__entry->eh_err_mask))
 );
 
+DECLARE_EVENT_CLASS(ata_eh_action_template,
+
+	TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action),
+
+	TP_ARGS(link, devno, eh_action),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	eh_action )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= link->ap->print_id;
+		__entry->ata_dev	= link->pmp + devno;
+		__entry->eh_action	= eh_action;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u eh_action=%s",
+		  __entry->ata_port, __entry->ata_dev,
+		  __parse_eh_action(__entry->eh_action))
+);
+
+DEFINE_EVENT(ata_eh_action_template, ata_eh_about_to_do,
+	     TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action),
+	     TP_ARGS(link, devno, eh_action));
+
+DEFINE_EVENT(ata_eh_action_template, ata_eh_done,
+	     TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action),
+	     TP_ARGS(link, devno, eh_action));
+
 DECLARE_EVENT_CLASS(ata_link_reset_begin_template,
 
 	TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline),
@@ -570,6 +601,35 @@ DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset,
 	     TP_PROTO(struct ata_link *link, unsigned int *class, int rc),
 	     TP_ARGS(link, class, rc));
 
+DECLARE_EVENT_CLASS(ata_port_eh_begin_template,
+
+	TP_PROTO(struct ata_port *ap),
+
+	TP_ARGS(ap),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= ap->print_id;
+	),
+
+	TP_printk("ata_port=%u", __entry->ata_port)
+);
+
+DEFINE_EVENT(ata_port_eh_begin_template, ata_std_sched_eh,
+	     TP_PROTO(struct ata_port *ap),
+	     TP_ARGS(ap));
+
+DEFINE_EVENT(ata_port_eh_begin_template, ata_port_freeze,
+	     TP_PROTO(struct ata_port *ap),
+	     TP_ARGS(ap));
+
+DEFINE_EVENT(ata_port_eh_begin_template, ata_port_thaw,
+	     TP_PROTO(struct ata_port *ap),
+	     TP_ARGS(ap));
+
 DECLARE_EVENT_CLASS(ata_sff_hsm_template,
 
 	TP_PROTO(struct ata_queued_cmd *qc, unsigned char status),
-- 
cgit v1.2.3


From 742bef476ca5352b16063161fb73a56629a6d995 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:20:35 +0100
Subject: ata: libata: move ata_{port,link,dev}_dbg to standard pr_XXX() macros

Use standard pr_{debug,info,notice,warn,err} macros instead of the
hand-crafted printk helpers.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 69 +++++++++++++++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 235fdbeb19ea..39cdde0b9491 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1489,51 +1489,61 @@ static inline int sata_srst_pmp(struct ata_link *link)
 	return link->pmp;
 }
 
-/*
- * printk helpers
- */
-__printf(3, 4)
-void ata_port_printk(const struct ata_port *ap, const char *level,
-		     const char *fmt, ...);
-__printf(3, 4)
-void ata_link_printk(const struct ata_link *link, const char *level,
-		     const char *fmt, ...);
-__printf(3, 4)
-void ata_dev_printk(const struct ata_device *dev, const char *level,
-		    const char *fmt, ...);
+#define ata_port_printk(level, ap, fmt, ...)			\
+	pr_ ## level ("ata%u: " fmt, (ap)->print_id, ##__VA_ARGS__)
 
 #define ata_port_err(ap, fmt, ...)				\
-	ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__)
+	ata_port_printk(err, ap, fmt, ##__VA_ARGS__)
 #define ata_port_warn(ap, fmt, ...)				\
-	ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__)
+	ata_port_printk(warn, ap, fmt, ##__VA_ARGS__)
 #define ata_port_notice(ap, fmt, ...)				\
-	ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__)
+	ata_port_printk(notice, ap, fmt, ##__VA_ARGS__)
 #define ata_port_info(ap, fmt, ...)				\
-	ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__)
+	ata_port_printk(info, ap, fmt, ##__VA_ARGS__)
 #define ata_port_dbg(ap, fmt, ...)				\
-	ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__)
+	ata_port_printk(debug, ap, fmt, ##__VA_ARGS__)
+
+#define ata_link_printk(level, link, fmt, ...)			\
+do {								\
+	if (sata_pmp_attached((link)->ap) ||			\
+	    (link)->ap->slave_link)				\
+		pr_ ## level ("ata%u.%02u: " fmt,		\
+			      (link)->ap->print_id,		\
+			      (link)->pmp,			\
+			      ##__VA_ARGS__);			\
+        else							\
+		pr_ ## level ("ata%u: " fmt,			\
+			      (link)->ap->print_id,		\
+			      ##__VA_ARGS__);			\
+} while (0)
 
 #define ata_link_err(link, fmt, ...)				\
-	ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__)
+	ata_link_printk(err, link, fmt, ##__VA_ARGS__)
 #define ata_link_warn(link, fmt, ...)				\
-	ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__)
+	ata_link_printk(warn, link, fmt, ##__VA_ARGS__)
 #define ata_link_notice(link, fmt, ...)				\
-	ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__)
+	ata_link_printk(notice, link, fmt, ##__VA_ARGS__)
 #define ata_link_info(link, fmt, ...)				\
-	ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__)
+	ata_link_printk(info, link, fmt, ##__VA_ARGS__)
 #define ata_link_dbg(link, fmt, ...)				\
-	ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__)
+	ata_link_printk(debug, link, fmt, ##__VA_ARGS__)
+
+#define ata_dev_printk(level, dev, fmt, ...)			\
+        pr_ ## level("ata%u.%02u: " fmt,			\
+               (dev)->link->ap->print_id,			\
+	       (dev)->link->pmp + (dev)->devno,			\
+	       ##__VA_ARGS__)
 
 #define ata_dev_err(dev, fmt, ...)				\
-	ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__)
+	ata_dev_printk(err, dev, fmt, ##__VA_ARGS__)
 #define ata_dev_warn(dev, fmt, ...)				\
-	ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__)
+	ata_dev_printk(warn, dev, fmt, ##__VA_ARGS__)
 #define ata_dev_notice(dev, fmt, ...)				\
-	ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__)
+	ata_dev_printk(notice, dev, fmt, ##__VA_ARGS__)
 #define ata_dev_info(dev, fmt, ...)				\
-	ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__)
+	ata_dev_printk(info, dev, fmt, ##__VA_ARGS__)
 #define ata_dev_dbg(dev, fmt, ...)				\
-	ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__)
+	ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__)
 
 void ata_print_version(const struct device *dev, const char *version);
 
@@ -2067,11 +2077,8 @@ static inline u8 ata_wait_idle(struct ata_port *ap)
 {
 	u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
 
-#ifdef ATA_DEBUG
 	if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ)))
-		ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n",
-				status);
-#endif
+		ata_port_dbg(ap, "abnormal Status 0x%X\n", status);
 
 	return status;
 }
-- 
cgit v1.2.3


From d97c75edd806669c9f4b56c0ddae37725c0b708c Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:01 +0100
Subject: ata: libata: drop ata_msg_error() and ata_msg_intr()

Unused.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 39cdde0b9491..4f0a85f4e69a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -78,8 +78,6 @@ enum {
 	ATA_MSG_WARN	= 0x0008,
 	ATA_MSG_MALLOC	= 0x0010,
 	ATA_MSG_CTL	= 0x0020,
-	ATA_MSG_INTR	= 0x0040,
-	ATA_MSG_ERR	= 0x0080,
 };
 
 #define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
@@ -88,8 +86,6 @@ enum {
 #define ata_msg_warn(p)   ((p)->msg_enable & ATA_MSG_WARN)
 #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC)
 #define ata_msg_ctl(p)    ((p)->msg_enable & ATA_MSG_CTL)
-#define ata_msg_intr(p)   ((p)->msg_enable & ATA_MSG_INTR)
-#define ata_msg_err(p)    ((p)->msg_enable & ATA_MSG_ERR)
 
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
-- 
cgit v1.2.3


From 5cef96b4207e01c9cdb7752acaa178056fe94632 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:02 +0100
Subject: ata: libata: drop ata_msg_ctl()

The one caller have been converted to dynamic debugging.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 4f0a85f4e69a..e384cce62963 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -77,7 +77,6 @@ enum {
 	ATA_MSG_PROBE	= 0x0004,
 	ATA_MSG_WARN	= 0x0008,
 	ATA_MSG_MALLOC	= 0x0010,
-	ATA_MSG_CTL	= 0x0020,
 };
 
 #define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
@@ -85,7 +84,6 @@ enum {
 #define ata_msg_probe(p)  ((p)->msg_enable & ATA_MSG_PROBE)
 #define ata_msg_warn(p)   ((p)->msg_enable & ATA_MSG_WARN)
 #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC)
-#define ata_msg_ctl(p)    ((p)->msg_enable & ATA_MSG_CTL)
 
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
-- 
cgit v1.2.3


From 2f784b923d50cdef1f6bd24d7c18614321b0833a Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:03 +0100
Subject: ata: libata: drop ata_msg_malloc()

Unused.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index e384cce62963..5651bbf4902b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -76,14 +76,12 @@ enum {
 	ATA_MSG_INFO	= 0x0002,
 	ATA_MSG_PROBE	= 0x0004,
 	ATA_MSG_WARN	= 0x0008,
-	ATA_MSG_MALLOC	= 0x0010,
 };
 
 #define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
 #define ata_msg_info(p)   ((p)->msg_enable & ATA_MSG_INFO)
 #define ata_msg_probe(p)  ((p)->msg_enable & ATA_MSG_PROBE)
 #define ata_msg_warn(p)   ((p)->msg_enable & ATA_MSG_WARN)
-#define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC)
 
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
-- 
cgit v1.2.3


From 16d424672716dc886fb58ec4a47a408db4781cc0 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:04 +0100
Subject: ata: libata: drop ata_msg_warn()

The WARN level was always enabled, so drop ata_msg_warn().

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5651bbf4902b..0e5ed2ff94be 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -75,13 +75,11 @@ enum {
 	ATA_MSG_DRV	= 0x0001,
 	ATA_MSG_INFO	= 0x0002,
 	ATA_MSG_PROBE	= 0x0004,
-	ATA_MSG_WARN	= 0x0008,
 };
 
 #define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
 #define ata_msg_info(p)   ((p)->msg_enable & ATA_MSG_INFO)
 #define ata_msg_probe(p)  ((p)->msg_enable & ATA_MSG_PROBE)
-#define ata_msg_warn(p)   ((p)->msg_enable & ATA_MSG_WARN)
 
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
-- 
cgit v1.2.3


From 17a1e1be2fc7dc99945b41df0485037dcb6044d0 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:05 +0100
Subject: ata: libata: drop ata_msg_probe()

All callsites have been converted to dynamic debugging.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0e5ed2ff94be..455d7e77e562 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -74,12 +74,10 @@
 enum {
 	ATA_MSG_DRV	= 0x0001,
 	ATA_MSG_INFO	= 0x0002,
-	ATA_MSG_PROBE	= 0x0004,
 };
 
 #define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
 #define ata_msg_info(p)   ((p)->msg_enable & ATA_MSG_INFO)
-#define ata_msg_probe(p)  ((p)->msg_enable & ATA_MSG_PROBE)
 
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
-- 
cgit v1.2.3


From 96c810f216cb6da15bfa8fe8ef3bf73ca91c5dd8 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:06 +0100
Subject: ata: libata: drop ata_msg_info()

Convert the sole caller to ata_dev_dbg() and remove the definition.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 455d7e77e562..524d09b1dc82 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -73,11 +73,9 @@
 
 enum {
 	ATA_MSG_DRV	= 0x0001,
-	ATA_MSG_INFO	= 0x0002,
 };
 
 #define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
-#define ata_msg_info(p)   ((p)->msg_enable & ATA_MSG_INFO)
 
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
-- 
cgit v1.2.3


From 1c95a27c1e544f723f6e0e5a4384098f92996ec0 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:07 +0100
Subject: ata: libata: drop ata_msg_drv()

Callers are already protected by ata_dev_print_info(), so no need
to have an additional configuration parameter here.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 524d09b1dc82..65172609a005 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -71,12 +71,6 @@
 /* NEW: debug levels */
 #define HAVE_LIBATA_MSG 1
 
-enum {
-	ATA_MSG_DRV	= 0x0001,
-};
-
-#define ata_msg_drv(p)    ((p)->msg_enable & ATA_MSG_DRV)
-
 static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
 {
 	if (dval < 0 || dval >= (sizeof(u32) * 8))
-- 
cgit v1.2.3


From db45905e74e6ae035305719bc683eca40f526669 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:08 +0100
Subject: ata: libata: remove 'new' ata message handling

Remove the remaining bits for the 'new' ata message handling.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 65172609a005..145c0132b75e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -68,18 +68,6 @@
 	}							\
 })
 
-/* NEW: debug levels */
-#define HAVE_LIBATA_MSG 1
-
-static inline u32 ata_msg_init(int dval, int default_msg_enable_bits)
-{
-	if (dval < 0 || dval >= (sizeof(u32) * 8))
-		return default_msg_enable_bits; /* should be 0x1 - only driver info msgs */
-	if (!dval)
-		return 0;
-	return (1 << dval) - 1;
-}
-
 /* defines only for the constants which don't work well as enums */
 #define ATA_TAG_POISON		0xfafbfcfdU
 
@@ -864,7 +852,6 @@ struct ata_port {
 
 	unsigned int		hsm_task_state;
 
-	u32			msg_enable;
 	struct list_head	eh_done_q;
 	wait_queue_head_t	eh_wait_q;
 	int			eh_tries;
-- 
cgit v1.2.3


From 870bb833c0acb29d8471eac5c2d2e6274826dbb6 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 21 Dec 2021 08:21:09 +0100
Subject: ata: libata: remove debug compilation switches

Unused now, so remove and drop any references to them.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 145c0132b75e..c258f69106f4 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -39,25 +39,9 @@
  * compile-time options: to be removed as soon as all the drivers are
  * converted to the new debugging mechanism
  */
-#undef ATA_DEBUG		/* debugging output */
-#undef ATA_VERBOSE_DEBUG	/* yet more debugging output */
 #undef ATA_IRQ_TRAP		/* define to ack screaming irqs */
-#undef ATA_NDEBUG		/* define to disable quick runtime checks */
 
 
-/* note: prints function name for you */
-#ifdef ATA_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#ifdef ATA_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define VPRINTK(fmt, args...)
-#endif	/* ATA_VERBOSE_DEBUG */
-#else
-#define DPRINTK(fmt, args...)
-#define VPRINTK(fmt, args...)
-#endif	/* ATA_DEBUG */
-
 #define ata_print_version_once(dev, version)			\
 ({								\
 	static bool __print_once;				\
-- 
cgit v1.2.3


From cc4b08c31b5c51352f258032cc65e884b3e61e6a Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 7 Dec 2021 21:15:31 +0900
Subject: can: do not increase tx_bytes statistics for RTR frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The actual payload length of the CAN Remote Transmission Request (RTR)
frames is always 0, i.e. no payload is transmitted on the wire.
However, those RTR frames still use the DLC to indicate the length of
the requested frame.

As such, net_device_stats::tx_bytes should not be increased when
sending RTR frames.

The function can_get_echo_skb() already returns the correct length,
even for RTR frames (c.f. [1]). However, for historical reasons, the
drivers do not use can_get_echo_skb()'s return value and instead, most
of them store a temporary length (or dlc) in some local structure or
array. Using the return value of can_get_echo_skb() solves the
issue. After doing this, such length/dlc fields become unused and so
this patch does the adequate cleaning when needed.

This patch fixes all the CAN drivers.

Finally, can_get_echo_skb() is decorated with the __must_check
attribute in order to force future drivers to correctly use its return
value (else the compiler would emit a warning).

[1] commit ed3320cec279 ("can: dev: __can_get_echo_skb():
fix real payload length return value for RTR frames")

Link: https://lore.kernel.org/all/20211207121531.42941-6-mailhol.vincent@wanadoo.fr
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Ludovic Desroches <ludovic.desroches@microchip.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Jernej Skrabec <jernej.skrabec@gmail.com>
Cc: Yasushi SHOJI <yashi@spacecubics.com>
Cc: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: Stephane Grosjean <s.grosjean@peak-system.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Tested-by: Jimmy Assarsson <extja@kvaser.com> # kvaser
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Acked-by: Stefan Mätje <stefan.maetje@esd.eu> # esd_usb2
Tested-by: Stefan Mätje <stefan.maetje@esd.eu> # esd_usb2
[mkl: add conversion for grcan]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/skb.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index d311bc369a39..fdb22b00674a 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -21,8 +21,9 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 		     unsigned int idx, unsigned int frame_len);
 struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
 				   u8 *len_ptr, unsigned int *frame_len_ptr);
-unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx,
-			      unsigned int *frame_len_ptr);
+unsigned int __must_check can_get_echo_skb(struct net_device *dev,
+					   unsigned int idx,
+					   unsigned int *frame_len_ptr);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx,
 		       unsigned int *frame_len_ptr);
 struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf);
-- 
cgit v1.2.3


From c9e1d8ed304cc6106c3241add170193995953325 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 14 Dec 2021 01:02:23 +0900
Subject: can: dev: replace can_priv::ctrlmode_static by
 can_get_static_ctrlmode()

The statically enabled features of a CAN controller can be retrieved
using below formula:

| u32 ctrlmode_static = priv->ctrlmode & ~priv->ctrlmode_supported;

As such, there is no need to store this information. This patch remove
the field ctrlmode_static of struct can_priv and provides, in
replacement, the inline function can_get_static_ctrlmode() which
returns the same value.

Link: https://lore.kernel.org/all/20211213160226.56219-2-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 45f19d9db5ca..92e2d69462f0 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -69,7 +69,6 @@ struct can_priv {
 	/* CAN controller features - see include/uapi/linux/can/netlink.h */
 	u32 ctrlmode;		/* current options setting */
 	u32 ctrlmode_supported;	/* options that can be modified by netlink */
-	u32 ctrlmode_static;	/* static enabled options for driver/hardware */
 
 	int restart_ms;
 	struct delayed_work restart_work;
@@ -139,13 +138,17 @@ static inline void can_set_static_ctrlmode(struct net_device *dev,
 
 	/* alloc_candev() succeeded => netdev_priv() is valid at this point */
 	priv->ctrlmode = static_mode;
-	priv->ctrlmode_static = static_mode;
 
 	/* override MTU which was set by default in can_setup()? */
 	if (static_mode & CAN_CTRLMODE_FD)
 		dev->mtu = CANFD_MTU;
 }
 
+static inline u32 can_get_static_ctrlmode(struct can_priv *priv)
+{
+	return priv->ctrlmode & ~priv->ctrlmode_supported;
+}
+
 void can_setup(struct net_device *dev);
 
 struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
-- 
cgit v1.2.3


From 7d4a101c0bd3c6e5c6e45c705a54f7bc8f6c128d Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 14 Dec 2021 01:02:24 +0900
Subject: can: dev: add sanity check in can_set_static_ctrlmode()

Previous patch removed can_priv::ctrlmode_static to replace it with
can_get_static_ctrlmode().

A condition sine qua non for this to work is that the controller
static modes should never be set in can_priv::ctrlmode_supported
(c.f. the comment on can_priv::ctrlmode_supported which states that it
is for "options that can be *modified* by netlink"). Also, this
condition is already correctly fulfilled by all existing drivers
which rely on the ctrlmode_static feature.

Nonetheless, we added an extra safeguard in can_set_static_ctrlmode()
to return an error value and to warn the developer who would be
adventurous enough to set to static a given feature that is already
set to supported.

The drivers which rely on the static controller mode are then updated
to check the return value of can_set_static_ctrlmode().

Link: https://lore.kernel.org/all/20211213160226.56219-3-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 92e2d69462f0..fff3f70df697 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -131,17 +131,24 @@ static inline s32 can_get_relative_tdco(const struct can_priv *priv)
 }
 
 /* helper to define static CAN controller features at device creation time */
-static inline void can_set_static_ctrlmode(struct net_device *dev,
-					   u32 static_mode)
+static inline int __must_check can_set_static_ctrlmode(struct net_device *dev,
+						       u32 static_mode)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
 	/* alloc_candev() succeeded => netdev_priv() is valid at this point */
+	if (priv->ctrlmode_supported & static_mode) {
+		netdev_warn(dev,
+			    "Controller features can not be supported and static at the same time\n");
+		return -EINVAL;
+	}
 	priv->ctrlmode = static_mode;
 
 	/* override MTU which was set by default in can_setup()? */
 	if (static_mode & CAN_CTRLMODE_FD)
 		dev->mtu = CANFD_MTU;
+
+	return 0;
 }
 
 static inline u32 can_get_static_ctrlmode(struct can_priv *priv)
-- 
cgit v1.2.3


From 5fe1be81efd28bf2afcac218f23118dca6b1b648 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 14 Dec 2021 01:02:25 +0900
Subject: can: dev: reorder struct can_priv members for better packing

Save eight bytes of holes on x86-64 architectures by reordering the
members of struct can_priv.

Before:

| $ pahole -C can_priv drivers/net/can/dev/dev.o
| struct can_priv {
| 	struct net_device *        dev;                  /*     0     8 */
| 	struct can_device_stats    can_stats;            /*     8    24 */
| 	const struct can_bittiming_const  * bittiming_const; /*    32     8 */
| 	const struct can_bittiming_const  * data_bittiming_const; /*    40     8 */
| 	struct can_bittiming       bittiming;            /*    48    32 */
| 	/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
| 	struct can_bittiming       data_bittiming;       /*    80    32 */
| 	const struct can_tdc_const  * tdc_const;         /*   112     8 */
| 	struct can_tdc             tdc;                  /*   120    12 */
| 	/* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */
| 	unsigned int               bitrate_const_cnt;    /*   132     4 */
| 	const u32  *               bitrate_const;        /*   136     8 */
| 	const u32  *               data_bitrate_const;   /*   144     8 */
| 	unsigned int               data_bitrate_const_cnt; /*   152     4 */
| 	u32                        bitrate_max;          /*   156     4 */
| 	struct can_clock           clock;                /*   160     4 */
| 	unsigned int               termination_const_cnt; /*   164     4 */
| 	const u16  *               termination_const;    /*   168     8 */
| 	u16                        termination;          /*   176     2 */
|
| 	/* XXX 6 bytes hole, try to pack */
|
| 	struct gpio_desc *         termination_gpio;     /*   184     8 */
| 	/* --- cacheline 3 boundary (192 bytes) --- */
| 	u16                        termination_gpio_ohms[2]; /*   192     4 */
| 	enum can_state             state;                /*   196     4 */
| 	u32                        ctrlmode;             /*   200     4 */
| 	u32                        ctrlmode_supported;   /*   204     4 */
| 	int                        restart_ms;           /*   208     4 */
|
| 	/* XXX 4 bytes hole, try to pack */
|
| 	struct delayed_work        restart_work;         /*   216    88 */
|
| 	/* XXX last struct has 4 bytes of padding */
|
| 	/* --- cacheline 4 boundary (256 bytes) was 48 bytes ago --- */
| 	int                        (*do_set_bittiming)(struct net_device *); /*   304     8 */
| 	int                        (*do_set_data_bittiming)(struct net_device *); /*   312     8 */
| 	/* --- cacheline 5 boundary (320 bytes) --- */
| 	int                        (*do_set_mode)(struct net_device *, enum can_mode); /*   320     8 */
| 	int                        (*do_set_termination)(struct net_device *, u16); /*   328     8 */
| 	int                        (*do_get_state)(const struct net_device  *, enum can_state *); /*   336     8 */
| 	int                        (*do_get_berr_counter)(const struct net_device  *, struct can_berr_counter *); /*   344     8 */
| 	unsigned int               echo_skb_max;         /*   352     4 */
|
| 	/* XXX 4 bytes hole, try to pack */
|
| 	struct sk_buff * *         echo_skb;             /*   360     8 */
|
| 	/* size: 368, cachelines: 6, members: 32 */
| 	/* sum members: 354, holes: 3, sum holes: 14 */
| 	/* paddings: 1, sum paddings: 4 */
| 	/* last cacheline: 48 bytes */
| };

After:

| $ pahole -C can_priv drivers/net/can/dev/dev.o
| struct can_priv {
| 	struct net_device *        dev;                  /*     0     8 */
| 	struct can_device_stats    can_stats;            /*     8    24 */
| 	const struct can_bittiming_const  * bittiming_const; /*    32     8 */
| 	const struct can_bittiming_const  * data_bittiming_const; /*    40     8 */
| 	struct can_bittiming       bittiming;            /*    48    32 */
| 	/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
| 	struct can_bittiming       data_bittiming;       /*    80    32 */
| 	const struct can_tdc_const  * tdc_const;         /*   112     8 */
| 	struct can_tdc             tdc;                  /*   120    12 */
| 	/* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */
| 	unsigned int               bitrate_const_cnt;    /*   132     4 */
| 	const u32  *               bitrate_const;        /*   136     8 */
| 	const u32  *               data_bitrate_const;   /*   144     8 */
| 	unsigned int               data_bitrate_const_cnt; /*   152     4 */
| 	u32                        bitrate_max;          /*   156     4 */
| 	struct can_clock           clock;                /*   160     4 */
| 	unsigned int               termination_const_cnt; /*   164     4 */
| 	const u16  *               termination_const;    /*   168     8 */
| 	u16                        termination;          /*   176     2 */
|
| 	/* XXX 6 bytes hole, try to pack */
|
| 	struct gpio_desc *         termination_gpio;     /*   184     8 */
| 	/* --- cacheline 3 boundary (192 bytes) --- */
| 	u16                        termination_gpio_ohms[2]; /*   192     4 */
| 	unsigned int               echo_skb_max;         /*   196     4 */
| 	struct sk_buff * *         echo_skb;             /*   200     8 */
| 	enum can_state             state;                /*   208     4 */
| 	u32                        ctrlmode;             /*   212     4 */
| 	u32                        ctrlmode_supported;   /*   216     4 */
| 	int                        restart_ms;           /*   220     4 */
| 	struct delayed_work        restart_work;         /*   224    88 */
|
| 	/* XXX last struct has 4 bytes of padding */
|
| 	/* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
| 	int                        (*do_set_bittiming)(struct net_device *); /*   312     8 */
| 	/* --- cacheline 5 boundary (320 bytes) --- */
| 	int                        (*do_set_data_bittiming)(struct net_device *); /*   320     8 */
| 	int                        (*do_set_mode)(struct net_device *, enum can_mode); /*   328     8 */
| 	int                        (*do_set_termination)(struct net_device *, u16); /*   336     8 */
| 	int                        (*do_get_state)(const struct net_device  *, enum can_state *); /*   344     8 */
| 	int                        (*do_get_berr_counter)(const struct net_device  *, struct can_berr_counter *); /*   352     8 */
|
| 	/* size: 360, cachelines: 6, members: 32 */
| 	/* sum members: 354, holes: 1, sum holes: 6 */
| 	/* paddings: 1, sum paddings: 4 */
| 	/* last cacheline: 40 bytes */
| };

Link: https://lore.kernel.org/all/20211213160226.56219-4-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index fff3f70df697..c2ea47f30046 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -64,6 +64,9 @@ struct can_priv {
 	struct gpio_desc *termination_gpio;
 	u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX];
 
+	unsigned int echo_skb_max;
+	struct sk_buff **echo_skb;
+
 	enum can_state state;
 
 	/* CAN controller features - see include/uapi/linux/can/netlink.h */
@@ -83,9 +86,6 @@ struct can_priv {
 				   struct can_berr_counter *bec);
 	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
 
-	unsigned int echo_skb_max;
-	struct sk_buff **echo_skb;
-
 #ifdef CONFIG_CAN_LEDS
 	struct led_trigger *tx_led_trig;
 	char tx_led_trig_name[CAN_LED_NAME_SZ];
-- 
cgit v1.2.3


From 383f0993fc77152b0773c85ed69d6734baf9cb48 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 14 Dec 2021 01:02:26 +0900
Subject: can: netlink: report the CAN controller mode supported flags

Currently, the CAN netlink interface provides no easy ways to check
the capabilities of a given controller. The only method from the
command line is to try each CAN_CTRLMODE_* individually to check
whether the netlink interface returns an -EOPNOTSUPP error or not
(alternatively, one may find it easier to directly check the source
code of the driver instead...)

This patch introduces a method for the user to check both the
supported and the static capabilities. The proposed method introduces
a new IFLA nest: IFLA_CAN_CTRLMODE_EXT which extends the current
IFLA_CAN_CTRLMODE. This is done to guaranty a full forward and
backward compatibility between the kernel and the user land
applications.

The IFLA_CAN_CTRLMODE_EXT nest contains one single entry:
IFLA_CAN_CTRLMODE_SUPPORTED. Because this entry is only used in one
direction: kernel to userland, no new struct nla_policy are
introduced.

Below table explains how IFLA_CAN_CTRLMODE_SUPPORTED (hereafter:
"supported") and can_ctrlmode::flags (hereafter: "flags") allow us to
identify both the supported and the static capabilities, when masked
with any of the CAN_CTRLMODE_* bit flags:

 supported &	flags &		Controller capabilities
 CAN_CTRLMODE_*	CAN_CTRLMODE_*
 -----------------------------------------------------------------------
 false		false		Feature not supported (always disabled)
 false		true		Static feature (always enabled)
 true		false		Feature supported but disabled
 true		true		Feature supported and enabled

Link: https://lore.kernel.org/all/20211213160226.56219-5-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/netlink.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 75b85c60efb2..02ec32d69474 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -137,6 +137,7 @@ enum {
 	IFLA_CAN_DATA_BITRATE_CONST,
 	IFLA_CAN_BITRATE_MAX,
 	IFLA_CAN_TDC,
+	IFLA_CAN_CTRLMODE_EXT,
 
 	/* add new constants above here */
 	__IFLA_CAN_MAX,
@@ -166,6 +167,18 @@ enum {
 	IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1
 };
 
+/*
+ * IFLA_CAN_CTRLMODE_EXT nest: controller mode extended parameters
+ */
+enum {
+	IFLA_CAN_CTRLMODE_UNSPEC,
+	IFLA_CAN_CTRLMODE_SUPPORTED,	/* u32 */
+
+	/* add new constants above here */
+	__IFLA_CAN_CTRLMODE,
+	IFLA_CAN_CTRLMODE_MAX = __IFLA_CAN_CTRLMODE - 1
+};
+
 /* u16 termination range: 1..65535 Ohms */
 #define CAN_TERMINATION_DISABLED 0
 
-- 
cgit v1.2.3


From c6af53f038aa32cec12e8a305ba07c7ef168f1b0 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 4 Jan 2022 12:07:00 +0000
Subject: net: mdio: add helpers to extract clause 45 regad and devad fields

Add a couple of helpers and definitions to extract the clause 45 regad
and devad fields from the regnum passed into MDIO drivers.

Tested-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 9f3587a61e14..ecac96d52e01 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -7,6 +7,7 @@
 #define __LINUX_MDIO_H__
 
 #include <uapi/linux/mdio.h>
+#include <linux/bitfield.h>
 #include <linux/mod_devicetable.h>
 
 /* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit
@@ -14,6 +15,7 @@
  */
 #define MII_ADDR_C45		(1<<30)
 #define MII_DEVADDR_C45_SHIFT	16
+#define MII_DEVADDR_C45_MASK	GENMASK(20, 16)
 #define MII_REGADDR_C45_MASK	GENMASK(15, 0)
 
 struct gpio_desc;
@@ -381,6 +383,16 @@ static inline u32 mdiobus_c45_addr(int devad, u16 regnum)
 	return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum;
 }
 
+static inline u16 mdiobus_c45_regad(u32 regnum)
+{
+	return FIELD_GET(MII_REGADDR_C45_MASK, regnum);
+}
+
+static inline u16 mdiobus_c45_devad(u32 regnum)
+{
+	return FIELD_GET(MII_DEVADDR_C45_MASK, regnum);
+}
+
 static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad,
 				     u16 regnum)
 {
-- 
cgit v1.2.3


From b08db33dabd18c5bd9a419b46302c186801eab10 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:35 +0200
Subject: net: dsa: move dsa_port :: stp_state near dsa_port :: mac

The MAC address of a port is 6 octets in size, and this creates a 2
octet hole after it. There are some other u8 members of struct dsa_port
that we can put in that hole. One such member is the stp_state.

Before:

pahole -C dsa_port net/dsa/slave.o
struct dsa_port {
        union {
                struct net_device * master;              /*     0     8 */
                struct net_device * slave;               /*     0     8 */
        };                                               /*     0     8 */
        const struct dsa_device_ops  * tag_ops;          /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
                DSA_PORT_TYPE_CPU    = 1,
                DSA_PORT_TYPE_DSA    = 2,
                DSA_PORT_TYPE_USER   = 3,
        } type;                                          /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_switch *        ds;                   /*    40     8 */
        unsigned int               index;                /*    48     4 */

        /* XXX 4 bytes hole, try to pack */

        const char  *              name;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_port *          cpu_dp;               /*    64     8 */
        u8                         mac[6];               /*    72     6 */

        /* XXX 2 bytes hole, try to pack */

        struct device_node *       dn;                   /*    80     8 */
        unsigned int               ageing_time;          /*    88     4 */
        bool                       vlan_filtering;       /*    92     1 */
        bool                       learning;             /*    93     1 */
        u8                         stp_state;            /*    94     1 */

        /* XXX 1 byte hole, try to pack */

        struct dsa_bridge *        bridge;               /*    96     8 */
        struct devlink_port        devlink_port;         /*   104   288 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        bool                       devlink_port_setup;   /*   392     1 */

        /* XXX 7 bytes hole, try to pack */

        struct phylink *           pl;                   /*   400     8 */
        struct phylink_config      pl_config;            /*   408    40 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct net_device *        lag_dev;              /*   448     8 */
        bool                       lag_tx_enabled;       /*   456     1 */

        /* XXX 7 bytes hole, try to pack */

        struct net_device *        hsr_dev;              /*   464     8 */
        struct list_head           list;                 /*   472    16 */
        const struct ethtool_ops  * orig_ethtool_ops;    /*   488     8 */
        const struct dsa_netdevice_ops  * netdev_ops;    /*   496     8 */
        struct mutex               addr_lists_lock;      /*   504    32 */
        /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
        struct list_head           fdbs;                 /*   536    16 */
        struct list_head           mdbs;                 /*   552    16 */
        bool                       setup;                /*   568     1 */

        /* size: 576, cachelines: 9, members: 30 */
        /* sum members: 544, holes: 6, sum holes: 25 */
        /* padding: 7 */
};

After:

pahole -C dsa_port net/dsa/slave.o
struct dsa_port {
        union {
                struct net_device * master;              /*     0     8 */
                struct net_device * slave;               /*     0     8 */
        };                                               /*     0     8 */
        const struct dsa_device_ops  * tag_ops;          /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
                DSA_PORT_TYPE_CPU    = 1,
                DSA_PORT_TYPE_DSA    = 2,
                DSA_PORT_TYPE_USER   = 3,
        } type;                                          /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_switch *        ds;                   /*    40     8 */
        unsigned int               index;                /*    48     4 */

        /* XXX 4 bytes hole, try to pack */

        const char  *              name;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_port *          cpu_dp;               /*    64     8 */
        u8                         mac[6];               /*    72     6 */
        u8                         stp_state;            /*    78     1 */

        /* XXX 1 byte hole, try to pack */

        struct device_node *       dn;                   /*    80     8 */
        unsigned int               ageing_time;          /*    88     4 */
        bool                       vlan_filtering;       /*    92     1 */
        bool                       learning;             /*    93     1 */

        /* XXX 2 bytes hole, try to pack */

        struct dsa_bridge *        bridge;               /*    96     8 */
        struct devlink_port        devlink_port;         /*   104   288 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        bool                       devlink_port_setup;   /*   392     1 */

        /* XXX 7 bytes hole, try to pack */

        struct phylink *           pl;                   /*   400     8 */
        struct phylink_config      pl_config;            /*   408    40 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct net_device *        lag_dev;              /*   448     8 */
        bool                       lag_tx_enabled;       /*   456     1 */

        /* XXX 7 bytes hole, try to pack */

        struct net_device *        hsr_dev;              /*   464     8 */
        struct list_head           list;                 /*   472    16 */
        const struct ethtool_ops  * orig_ethtool_ops;    /*   488     8 */
        const struct dsa_netdevice_ops  * netdev_ops;    /*   496     8 */
        struct mutex               addr_lists_lock;      /*   504    32 */
        /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
        struct list_head           fdbs;                 /*   536    16 */
        struct list_head           mdbs;                 /*   552    16 */
        bool                       setup;                /*   568     1 */

        /* size: 576, cachelines: 9, members: 30 */
        /* sum members: 544, holes: 6, sum holes: 25 */
        /* padding: 7 */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index f16959444ae1..8878f9ce251b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -258,12 +258,14 @@ struct dsa_port {
 	const char		*name;
 	struct dsa_port		*cpu_dp;
 	u8			mac[ETH_ALEN];
+
+	u8			stp_state;
+
 	struct device_node	*dn;
 	unsigned int		ageing_time;
 	bool			vlan_filtering;
 	/* Managed by DSA on user ports and by drivers on CPU and DSA ports */
 	bool			learning;
-	u8			stp_state;
 	struct dsa_bridge	*bridge;
 	struct devlink_port	devlink_port;
 	bool			devlink_port_setup;
-- 
cgit v1.2.3


From bde82f389af1225df0798851eedbcd03445530d9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:36 +0200
Subject: net: dsa: merge all bools of struct dsa_port into a single u8

struct dsa_port has 5 bool members which create quite a number of 7 byte
holes in the structure layout. By merging them all into bitfields of an
u8, and placing that u8 in the 1-byte hole after dp->mac and dp->stp_state,
we can reduce the structure size from 576 bytes to 552 bytes on arm64.

Before:

pahole -C dsa_port net/dsa/slave.o
struct dsa_port {
        union {
                struct net_device * master;              /*     0     8 */
                struct net_device * slave;               /*     0     8 */
        };                                               /*     0     8 */
        const struct dsa_device_ops  * tag_ops;          /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
                DSA_PORT_TYPE_CPU    = 1,
                DSA_PORT_TYPE_DSA    = 2,
                DSA_PORT_TYPE_USER   = 3,
        } type;                                          /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_switch *        ds;                   /*    40     8 */
        unsigned int               index;                /*    48     4 */

        /* XXX 4 bytes hole, try to pack */

        const char  *              name;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_port *          cpu_dp;               /*    64     8 */
        u8                         mac[6];               /*    72     6 */
        u8                         stp_state;            /*    78     1 */

        /* XXX 1 byte hole, try to pack */

        struct device_node *       dn;                   /*    80     8 */
        unsigned int               ageing_time;          /*    88     4 */
        bool                       vlan_filtering;       /*    92     1 */
        bool                       learning;             /*    93     1 */

        /* XXX 2 bytes hole, try to pack */

        struct dsa_bridge *        bridge;               /*    96     8 */
        struct devlink_port        devlink_port;         /*   104   288 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        bool                       devlink_port_setup;   /*   392     1 */

        /* XXX 7 bytes hole, try to pack */

        struct phylink *           pl;                   /*   400     8 */
        struct phylink_config      pl_config;            /*   408    40 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct net_device *        lag_dev;              /*   448     8 */
        bool                       lag_tx_enabled;       /*   456     1 */

        /* XXX 7 bytes hole, try to pack */

        struct net_device *        hsr_dev;              /*   464     8 */
        struct list_head           list;                 /*   472    16 */
        const struct ethtool_ops  * orig_ethtool_ops;    /*   488     8 */
        const struct dsa_netdevice_ops  * netdev_ops;    /*   496     8 */
        struct mutex               addr_lists_lock;      /*   504    32 */
        /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
        struct list_head           fdbs;                 /*   536    16 */
        struct list_head           mdbs;                 /*   552    16 */
        bool                       setup;                /*   568     1 */

        /* size: 576, cachelines: 9, members: 30 */
        /* sum members: 544, holes: 6, sum holes: 25 */
        /* padding: 7 */
};

After:

pahole -C dsa_port net/dsa/slave.o
struct dsa_port {
        union {
                struct net_device * master;              /*     0     8 */
                struct net_device * slave;               /*     0     8 */
        };                                               /*     0     8 */
        const struct dsa_device_ops  * tag_ops;          /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
                DSA_PORT_TYPE_CPU    = 1,
                DSA_PORT_TYPE_DSA    = 2,
                DSA_PORT_TYPE_USER   = 3,
        } type;                                          /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_switch *        ds;                   /*    40     8 */
        unsigned int               index;                /*    48     4 */

        /* XXX 4 bytes hole, try to pack */

        const char  *              name;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_port *          cpu_dp;               /*    64     8 */
        u8                         mac[6];               /*    72     6 */
        u8                         stp_state;            /*    78     1 */
        u8                         vlan_filtering:1;     /*    79: 0  1 */
        u8                         learning:1;           /*    79: 1  1 */
        u8                         lag_tx_enabled:1;     /*    79: 2  1 */
        u8                         devlink_port_setup:1; /*    79: 3  1 */
        u8                         setup:1;              /*    79: 4  1 */

        /* XXX 3 bits hole, try to pack */

        struct device_node *       dn;                   /*    80     8 */
        unsigned int               ageing_time;          /*    88     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_bridge *        bridge;               /*    96     8 */
        struct devlink_port        devlink_port;         /*   104   288 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        struct phylink *           pl;                   /*   392     8 */
        struct phylink_config      pl_config;            /*   400    40 */
        struct net_device *        lag_dev;              /*   440     8 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct net_device *        hsr_dev;              /*   448     8 */
        struct list_head           list;                 /*   456    16 */
        const struct ethtool_ops  * orig_ethtool_ops;    /*   472     8 */
        const struct dsa_netdevice_ops  * netdev_ops;    /*   480     8 */
        struct mutex               addr_lists_lock;      /*   488    32 */
        /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */
        struct list_head           fdbs;                 /*   520    16 */
        struct list_head           mdbs;                 /*   536    16 */

        /* size: 552, cachelines: 9, members: 30 */
        /* sum members: 539, holes: 3, sum holes: 12 */
        /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */
        /* last cacheline: 40 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8878f9ce251b..a8f0037b58e2 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -261,18 +261,23 @@ struct dsa_port {
 
 	u8			stp_state;
 
+	u8			vlan_filtering:1,
+				/* Managed by DSA on user ports and by
+				 * drivers on CPU and DSA ports
+				 */
+				learning:1,
+				lag_tx_enabled:1,
+				devlink_port_setup:1,
+				setup:1;
+
 	struct device_node	*dn;
 	unsigned int		ageing_time;
-	bool			vlan_filtering;
-	/* Managed by DSA on user ports and by drivers on CPU and DSA ports */
-	bool			learning;
+
 	struct dsa_bridge	*bridge;
 	struct devlink_port	devlink_port;
-	bool			devlink_port_setup;
 	struct phylink		*pl;
 	struct phylink_config	pl_config;
 	struct net_device	*lag_dev;
-	bool			lag_tx_enabled;
 	struct net_device	*hsr_dev;
 
 	struct list_head list;
@@ -293,8 +298,6 @@ struct dsa_port {
 	struct mutex		addr_lists_lock;
 	struct list_head	fdbs;
 	struct list_head	mdbs;
-
-	bool setup;
 };
 
 /* TODO: ideally DSA ports would have a single dp->link_dp member,
-- 
cgit v1.2.3


From 0625125877da9c1c09611a4acfd101571b7d1e16 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:37 +0200
Subject: net: dsa: move dsa_port :: type near dsa_port :: index

Both dsa_port :: type and dsa_port :: index introduce a 4 octet hole
after them, so we can group them together and the holes would be
eliminated, turning 16 octets of storage into just 8. This makes the
cpu_dp pointer fit in the first cache line, which is good, because
dsa_slave_to_master(), called by dsa_enqueue_skb(), uses it.

Before:

pahole -C dsa_port net/dsa/slave.o
struct dsa_port {
        union {
                struct net_device * master;              /*     0     8 */
                struct net_device * slave;               /*     0     8 */
        };                                               /*     0     8 */
        const struct dsa_device_ops  * tag_ops;          /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
                DSA_PORT_TYPE_CPU    = 1,
                DSA_PORT_TYPE_DSA    = 2,
                DSA_PORT_TYPE_USER   = 3,
        } type;                                          /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_switch *        ds;                   /*    40     8 */
        unsigned int               index;                /*    48     4 */

        /* XXX 4 bytes hole, try to pack */

        const char  *              name;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_port *          cpu_dp;               /*    64     8 */
        u8                         mac[6];               /*    72     6 */
        u8                         stp_state;            /*    78     1 */
        u8                         vlan_filtering:1;     /*    79: 0  1 */
        u8                         learning:1;           /*    79: 1  1 */
        u8                         lag_tx_enabled:1;     /*    79: 2  1 */
        u8                         devlink_port_setup:1; /*    79: 3  1 */
        u8                         setup:1;              /*    79: 4  1 */

        /* XXX 3 bits hole, try to pack */

        struct device_node *       dn;                   /*    80     8 */
        unsigned int               ageing_time;          /*    88     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_bridge *        bridge;               /*    96     8 */
        struct devlink_port        devlink_port;         /*   104   288 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        struct phylink *           pl;                   /*   392     8 */
        struct phylink_config      pl_config;            /*   400    40 */
        struct net_device *        lag_dev;              /*   440     8 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct net_device *        hsr_dev;              /*   448     8 */
        struct list_head           list;                 /*   456    16 */
        const struct ethtool_ops  * orig_ethtool_ops;    /*   472     8 */
        const struct dsa_netdevice_ops  * netdev_ops;    /*   480     8 */
        struct mutex               addr_lists_lock;      /*   488    32 */
        /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */
        struct list_head           fdbs;                 /*   520    16 */
        struct list_head           mdbs;                 /*   536    16 */

        /* size: 552, cachelines: 9, members: 30 */
        /* sum members: 539, holes: 3, sum holes: 12 */
        /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */
        /* last cacheline: 40 bytes */
};

After:

pahole -C dsa_port net/dsa/slave.o
struct dsa_port {
        union {
                struct net_device * master;              /*     0     8 */
                struct net_device * slave;               /*     0     8 */
        };                                               /*     0     8 */
        const struct dsa_device_ops  * tag_ops;          /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        struct sk_buff *           (*rcv)(struct sk_buff *, struct net_device *); /*    24     8 */
        struct dsa_switch *        ds;                   /*    32     8 */
        unsigned int               index;                /*    40     4 */
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
                DSA_PORT_TYPE_CPU    = 1,
                DSA_PORT_TYPE_DSA    = 2,
                DSA_PORT_TYPE_USER   = 3,
        } type;                                          /*    44     4 */
        const char  *              name;                 /*    48     8 */
        struct dsa_port *          cpu_dp;               /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        u8                         mac[6];               /*    64     6 */
        u8                         stp_state;            /*    70     1 */
        u8                         vlan_filtering:1;     /*    71: 0  1 */
        u8                         learning:1;           /*    71: 1  1 */
        u8                         lag_tx_enabled:1;     /*    71: 2  1 */
        u8                         devlink_port_setup:1; /*    71: 3  1 */
        u8                         setup:1;              /*    71: 4  1 */

        /* XXX 3 bits hole, try to pack */

        struct device_node *       dn;                   /*    72     8 */
        unsigned int               ageing_time;          /*    80     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_bridge *        bridge;               /*    88     8 */
        struct devlink_port        devlink_port;         /*    96   288 */
        /* --- cacheline 6 boundary (384 bytes) --- */
        struct phylink *           pl;                   /*   384     8 */
        struct phylink_config      pl_config;            /*   392    40 */
        struct net_device *        lag_dev;              /*   432     8 */
        struct net_device *        hsr_dev;              /*   440     8 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct list_head           list;                 /*   448    16 */
        const struct ethtool_ops  * orig_ethtool_ops;    /*   464     8 */
        const struct dsa_netdevice_ops  * netdev_ops;    /*   472     8 */
        struct mutex               addr_lists_lock;      /*   480    32 */
        /* --- cacheline 8 boundary (512 bytes) --- */
        struct list_head           fdbs;                 /*   512    16 */
        struct list_head           mdbs;                 /*   528    16 */

        /* size: 544, cachelines: 9, members: 30 */
        /* sum members: 539, holes: 1, sum holes: 4 */
        /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */
        /* last cacheline: 32 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index a8f0037b58e2..5e42fa7ea377 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -246,6 +246,10 @@ struct dsa_port {
 	struct dsa_switch_tree *dst;
 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
 
+	struct dsa_switch	*ds;
+
+	unsigned int		index;
+
 	enum {
 		DSA_PORT_TYPE_UNUSED = 0,
 		DSA_PORT_TYPE_CPU,
@@ -253,8 +257,6 @@ struct dsa_port {
 		DSA_PORT_TYPE_USER,
 	} type;
 
-	struct dsa_switch	*ds;
-	unsigned int		index;
 	const char		*name;
 	struct dsa_port		*cpu_dp;
 	u8			mac[ETH_ALEN];
-- 
cgit v1.2.3


From 7787ff7763989fe61db94a82d70d6fa80848d7c9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:38 +0200
Subject: net: dsa: merge all bools of struct dsa_switch into a single u32

struct dsa_switch has 9 boolean properties, many of which are in fact
set by drivers for custom behavior (vlan_filtering_is_global,
needs_standalone_vlan_filtering, etc etc). The binary layout of the
structure could be improved. For example, the "bool setup" at the
beginning introduces a gratuitous 7 byte hole in the first cache line.

The change merges all boolean properties into bitfields of an u32, and
places that u32 in the first cache line of the structure, since many
bools are accessed from the data path (untag_bridge_pvid, vlan_filtering,
vlan_filtering_is_global).

We place this u32 after the existing ds->index, which is also 4 bytes in
size. As a positive side effect, ds->tagger_data now fits into the first
cache line too, because 4 bytes are saved.

Before:

pahole -C dsa_switch net/dsa/slave.o
struct dsa_switch {
        bool                       setup;                /*     0     1 */

        /* XXX 7 bytes hole, try to pack */

        struct device *            dev;                  /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        unsigned int               index;                /*    24     4 */

        /* XXX 4 bytes hole, try to pack */

        struct notifier_block      nb;                   /*    32    24 */

        /* XXX last struct has 4 bytes of padding */

        void *                     priv;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        void *                     tagger_data;          /*    64     8 */
        struct dsa_chip_data *     cd;                   /*    72     8 */
        const struct dsa_switch_ops  * ops;              /*    80     8 */
        u32                        phys_mii_mask;        /*    88     4 */

        /* XXX 4 bytes hole, try to pack */

        struct mii_bus *           slave_mii_bus;        /*    96     8 */
        unsigned int               ageing_time_min;      /*   104     4 */
        unsigned int               ageing_time_max;      /*   108     4 */
        struct dsa_8021q_context * tag_8021q_ctx;        /*   112     8 */
        struct devlink *           devlink;              /*   120     8 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        unsigned int               num_tx_queues;        /*   128     4 */
        bool                       vlan_filtering_is_global; /*   132     1 */
        bool                       needs_standalone_vlan_filtering; /*   133     1 */
        bool                       configure_vlan_while_not_filtering; /*   134     1 */
        bool                       untag_bridge_pvid;    /*   135     1 */
        bool                       assisted_learning_on_cpu_port; /*   136     1 */
        bool                       vlan_filtering;       /*   137     1 */
        bool                       pcs_poll;             /*   138     1 */
        bool                       mtu_enforcement_ingress; /*   139     1 */
        unsigned int               num_lag_ids;          /*   140     4 */
        unsigned int               max_num_bridges;      /*   144     4 */

        /* XXX 4 bytes hole, try to pack */

        size_t                     num_ports;            /*   152     8 */

        /* size: 160, cachelines: 3, members: 27 */
        /* sum members: 141, holes: 4, sum holes: 19 */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 32 bytes */
};

After:

pahole -C dsa_switch net/dsa/slave.o
struct dsa_switch {
        struct device *            dev;                  /*     0     8 */
        struct dsa_switch_tree *   dst;                  /*     8     8 */
        unsigned int               index;                /*    16     4 */
        u32                        setup:1;              /*    20: 0  4 */
        u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
        u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
        u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
        u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
        u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
        u32                        vlan_filtering:1;     /*    20: 6  4 */
        u32                        pcs_poll:1;           /*    20: 7  4 */
        u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */

        /* XXX 23 bits hole, try to pack */

        struct notifier_block      nb;                   /*    24    24 */

        /* XXX last struct has 4 bytes of padding */

        void *                     priv;                 /*    48     8 */
        void *                     tagger_data;          /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_chip_data *     cd;                   /*    64     8 */
        const struct dsa_switch_ops  * ops;              /*    72     8 */
        u32                        phys_mii_mask;        /*    80     4 */

        /* XXX 4 bytes hole, try to pack */

        struct mii_bus *           slave_mii_bus;        /*    88     8 */
        unsigned int               ageing_time_min;      /*    96     4 */
        unsigned int               ageing_time_max;      /*   100     4 */
        struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
        struct devlink *           devlink;              /*   112     8 */
        unsigned int               num_tx_queues;        /*   120     4 */
        unsigned int               num_lag_ids;          /*   124     4 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        unsigned int               max_num_bridges;      /*   128     4 */

        /* XXX 4 bytes hole, try to pack */

        size_t                     num_ports;            /*   136     8 */

        /* size: 144, cachelines: 3, members: 27 */
        /* sum members: 132, holes: 2, sum holes: 8 */
        /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 16 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 97 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 51 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 5e42fa7ea377..a8a586039033 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -321,8 +321,6 @@ struct dsa_mac_addr {
 };
 
 struct dsa_switch {
-	bool setup;
-
 	struct device *dev;
 
 	/*
@@ -331,6 +329,57 @@ struct dsa_switch {
 	struct dsa_switch_tree	*dst;
 	unsigned int		index;
 
+	u32			setup:1,
+				/* Disallow bridge core from requesting
+				 * different VLAN awareness settings on ports
+				 * if not hardware-supported
+				 */
+				vlan_filtering_is_global:1,
+				/* Keep VLAN filtering enabled on ports not
+				 * offloading any upper
+				 */
+				needs_standalone_vlan_filtering:1,
+				/* Pass .port_vlan_add and .port_vlan_del to
+				 * drivers even for bridges that have
+				 * vlan_filtering=0. All drivers should ideally
+				 * set this (and then the option would get
+				 * removed), but it is unknown whether this
+				 * would break things or not.
+				 */
+				configure_vlan_while_not_filtering:1,
+				/* If the switch driver always programs the CPU
+				 * port as egress tagged despite the VLAN
+				 * configuration indicating otherwise, then
+				 * setting @untag_bridge_pvid will force the
+				 * DSA receive path to pop the bridge's
+				 * default_pvid VLAN tagged frames to offer a
+				 * consistent behavior between a
+				 * vlan_filtering=0 and vlan_filtering=1 bridge
+				 * device.
+				 */
+				untag_bridge_pvid:1,
+				/* Let DSA manage the FDB entries towards the
+				 * CPU, based on the software bridge database.
+				 */
+				assisted_learning_on_cpu_port:1,
+				/* In case vlan_filtering_is_global is set, the
+				 * VLAN awareness state should be retrieved
+				 * from here and not from the per-port
+				 * settings.
+				 */
+				vlan_filtering:1,
+				/* MAC PCS does not provide link state change
+				 * interrupt, and requires polling. Flag passed
+				 * on to PHYLINK.
+				 */
+				pcs_poll:1,
+				/* For switches that only have the MRU
+				 * configurable. To ensure the configured MTU
+				 * is not exceeded, normalization of MRU on all
+				 * bridged interfaces is needed.
+				 */
+				mtu_enforcement_ingress:1;
+
 	/* Listener for switch fabric events */
 	struct notifier_block	nb;
 
@@ -371,50 +420,6 @@ struct dsa_switch {
 	/* Number of switch port queues */
 	unsigned int		num_tx_queues;
 
-	/* Disallow bridge core from requesting different VLAN awareness
-	 * settings on ports if not hardware-supported
-	 */
-	bool			vlan_filtering_is_global;
-
-	/* Keep VLAN filtering enabled on ports not offloading any upper. */
-	bool			needs_standalone_vlan_filtering;
-
-	/* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
-	 * that have vlan_filtering=0. All drivers should ideally set this (and
-	 * then the option would get removed), but it is unknown whether this
-	 * would break things or not.
-	 */
-	bool			configure_vlan_while_not_filtering;
-
-	/* If the switch driver always programs the CPU port as egress tagged
-	 * despite the VLAN configuration indicating otherwise, then setting
-	 * @untag_bridge_pvid will force the DSA receive path to pop the bridge's
-	 * default_pvid VLAN tagged frames to offer a consistent behavior
-	 * between a vlan_filtering=0 and vlan_filtering=1 bridge device.
-	 */
-	bool			untag_bridge_pvid;
-
-	/* Let DSA manage the FDB entries towards the CPU, based on the
-	 * software bridge database.
-	 */
-	bool			assisted_learning_on_cpu_port;
-
-	/* In case vlan_filtering_is_global is set, the VLAN awareness state
-	 * should be retrieved from here and not from the per-port settings.
-	 */
-	bool			vlan_filtering;
-
-	/* MAC PCS does not provide link state change interrupt, and requires
-	 * polling. Flag passed on to PHYLINK.
-	 */
-	bool			pcs_poll;
-
-	/* For switches that only have the MRU configurable. To ensure the
-	 * configured MTU is not exceeded, normalization of MRU on all bridged
-	 * interfaces is needed.
-	 */
-	bool			mtu_enforcement_ingress;
-
 	/* Drivers that benefit from having an ID associated with each
 	 * offloaded LAG should set this to the maximum number of
 	 * supported IDs. DSA will then maintain a mapping of _at
-- 
cgit v1.2.3


From 258030acc93b459ca070924921f214bd05a2d56c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:39 +0200
Subject: net: dsa: make dsa_switch :: num_ports an unsigned int

Currently, num_ports is declared as size_t, which is defined as
__kernel_ulong_t, therefore it occupies 8 bytes of memory.

Even switches with port numbers in the range of tens are exotic, so
there is no need for this amount of storage.

Additionally, because the max_num_bridges member right above it is also
4 bytes, it means the compiler needs to add padding between the last 2
fields. By reducing the size, we don't need that padding and can reduce
the struct size.

Before:

pahole -C dsa_switch net/dsa/slave.o
struct dsa_switch {
        struct device *            dev;                  /*     0     8 */
        struct dsa_switch_tree *   dst;                  /*     8     8 */
        unsigned int               index;                /*    16     4 */
        u32                        setup:1;              /*    20: 0  4 */
        u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
        u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
        u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
        u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
        u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
        u32                        vlan_filtering:1;     /*    20: 6  4 */
        u32                        pcs_poll:1;           /*    20: 7  4 */
        u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */

        /* XXX 23 bits hole, try to pack */

        struct notifier_block      nb;                   /*    24    24 */

        /* XXX last struct has 4 bytes of padding */

        void *                     priv;                 /*    48     8 */
        void *                     tagger_data;          /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_chip_data *     cd;                   /*    64     8 */
        const struct dsa_switch_ops  * ops;              /*    72     8 */
        u32                        phys_mii_mask;        /*    80     4 */

        /* XXX 4 bytes hole, try to pack */

        struct mii_bus *           slave_mii_bus;        /*    88     8 */
        unsigned int               ageing_time_min;      /*    96     4 */
        unsigned int               ageing_time_max;      /*   100     4 */
        struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
        struct devlink *           devlink;              /*   112     8 */
        unsigned int               num_tx_queues;        /*   120     4 */
        unsigned int               num_lag_ids;          /*   124     4 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        unsigned int               max_num_bridges;      /*   128     4 */

        /* XXX 4 bytes hole, try to pack */

        size_t                     num_ports;            /*   136     8 */

        /* size: 144, cachelines: 3, members: 27 */
        /* sum members: 132, holes: 2, sum holes: 8 */
        /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 16 bytes */
};

After:

pahole -C dsa_switch net/dsa/slave.o
struct dsa_switch {
        struct device *            dev;                  /*     0     8 */
        struct dsa_switch_tree *   dst;                  /*     8     8 */
        unsigned int               index;                /*    16     4 */
        u32                        setup:1;              /*    20: 0  4 */
        u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
        u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
        u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
        u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
        u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
        u32                        vlan_filtering:1;     /*    20: 6  4 */
        u32                        pcs_poll:1;           /*    20: 7  4 */
        u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */

        /* XXX 23 bits hole, try to pack */

        struct notifier_block      nb;                   /*    24    24 */

        /* XXX last struct has 4 bytes of padding */

        void *                     priv;                 /*    48     8 */
        void *                     tagger_data;          /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_chip_data *     cd;                   /*    64     8 */
        const struct dsa_switch_ops  * ops;              /*    72     8 */
        u32                        phys_mii_mask;        /*    80     4 */

        /* XXX 4 bytes hole, try to pack */

        struct mii_bus *           slave_mii_bus;        /*    88     8 */
        unsigned int               ageing_time_min;      /*    96     4 */
        unsigned int               ageing_time_max;      /*   100     4 */
        struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
        struct devlink *           devlink;              /*   112     8 */
        unsigned int               num_tx_queues;        /*   120     4 */
        unsigned int               num_lag_ids;          /*   124     4 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        unsigned int               max_num_bridges;      /*   128     4 */
        unsigned int               num_ports;            /*   132     4 */

        /* size: 136, cachelines: 3, members: 27 */
        /* sum members: 128, holes: 1, sum holes: 4 */
        /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 8 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index a8a586039033..fef9d8bb5190 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -435,7 +435,7 @@ struct dsa_switch {
 	 */
 	unsigned int		max_num_bridges;
 
-	size_t num_ports;
+	unsigned int		num_ports;
 };
 
 static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
-- 
cgit v1.2.3


From b035c88c6a303d39d511ef71ac4e2e0176756661 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:40 +0200
Subject: net: dsa: move dsa_switch_tree :: ports and lags to first cache line

dst->ports is accessed most notably by dsa_master_find_slave(), which is
invoked in the RX path.

dst->lags is accessed by dsa_lag_dev(), which is invoked in the RX path
of tag_dsa.c.

dst->tag_ops, dst->default_proto and dst->pd don't need to be in the
first cache line, so they are moved out by this change.

Before:

pahole -C dsa_switch_tree net/dsa/slave.o
struct dsa_switch_tree {
        struct list_head           list;                 /*     0    16 */
        struct raw_notifier_head   nh;                   /*    16     8 */
        unsigned int               index;                /*    24     4 */
        struct kref                refcount;             /*    28     4 */
        bool                       setup;                /*    32     1 */

        /* XXX 7 bytes hole, try to pack */

        const struct dsa_device_ops  * tag_ops;          /*    40     8 */
        enum dsa_tag_protocol      default_proto;        /*    48     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_platform_data * pd;                   /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct list_head           ports;                /*    64    16 */
        struct list_head           rtable;               /*    80    16 */
        struct net_device * *      lags;                 /*    96     8 */
        unsigned int               lags_len;             /*   104     4 */
        unsigned int               last_switch;          /*   108     4 */

        /* size: 112, cachelines: 2, members: 13 */
        /* sum members: 101, holes: 2, sum holes: 11 */
        /* last cacheline: 48 bytes */
};

After:

pahole -C dsa_switch_tree net/dsa/slave.o
struct dsa_switch_tree {
        struct list_head           list;                 /*     0    16 */
        struct list_head           ports;                /*    16    16 */
        struct raw_notifier_head   nh;                   /*    32     8 */
        unsigned int               index;                /*    40     4 */
        struct kref                refcount;             /*    44     4 */
        struct net_device * *      lags;                 /*    48     8 */
        bool                       setup;                /*    56     1 */

        /* XXX 7 bytes hole, try to pack */

        /* --- cacheline 1 boundary (64 bytes) --- */
        const struct dsa_device_ops  * tag_ops;          /*    64     8 */
        enum dsa_tag_protocol      default_proto;        /*    72     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_platform_data * pd;                   /*    80     8 */
        struct list_head           rtable;               /*    88    16 */
        unsigned int               lags_len;             /*   104     4 */
        unsigned int               last_switch;          /*   108     4 */

        /* size: 112, cachelines: 2, members: 13 */
        /* sum members: 101, holes: 2, sum holes: 11 */
        /* last cacheline: 48 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fef9d8bb5190..cbbac75138d9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -119,6 +119,9 @@ struct dsa_netdevice_ops {
 struct dsa_switch_tree {
 	struct list_head	list;
 
+	/* List of switch ports */
+	struct list_head ports;
+
 	/* Notifier chain for switch-wide events */
 	struct raw_notifier_head	nh;
 
@@ -128,6 +131,11 @@ struct dsa_switch_tree {
 	/* Number of switches attached to this tree */
 	struct kref refcount;
 
+	/* Maps offloaded LAG netdevs to a zero-based linear ID for
+	 * drivers that need it.
+	 */
+	struct net_device **lags;
+
 	/* Has this tree been applied to the hardware? */
 	bool setup;
 
@@ -145,16 +153,10 @@ struct dsa_switch_tree {
 	 */
 	struct dsa_platform_data	*pd;
 
-	/* List of switch ports */
-	struct list_head ports;
-
 	/* List of DSA links composing the routing table */
 	struct list_head rtable;
 
-	/* Maps offloaded LAG netdevs to a zero-based linear ID for
-	 * drivers that need it.
-	 */
-	struct net_device **lags;
+	/* Length of "lags" array */
 	unsigned int lags_len;
 
 	/* Track the largest switch index within a tree */
-- 
cgit v1.2.3


From 4b026e82893b2e084fcc48bc26f91612b62ecc4f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 5 Jan 2022 15:21:41 +0200
Subject: net: dsa: combine two holes in struct dsa_switch_tree

There is a 7 byte hole after dst->setup and a 4 byte hole after
dst->default_proto. Combining them, we have a single hole of just 3
bytes on 64 bit machines.

Before:

pahole -C dsa_switch_tree net/dsa/slave.o
struct dsa_switch_tree {
        struct list_head           list;                 /*     0    16 */
        struct list_head           ports;                /*    16    16 */
        struct raw_notifier_head   nh;                   /*    32     8 */
        unsigned int               index;                /*    40     4 */
        struct kref                refcount;             /*    44     4 */
        struct net_device * *      lags;                 /*    48     8 */
        bool                       setup;                /*    56     1 */

        /* XXX 7 bytes hole, try to pack */

        /* --- cacheline 1 boundary (64 bytes) --- */
        const struct dsa_device_ops  * tag_ops;          /*    64     8 */
        enum dsa_tag_protocol      default_proto;        /*    72     4 */

        /* XXX 4 bytes hole, try to pack */

        struct dsa_platform_data * pd;                   /*    80     8 */
        struct list_head           rtable;               /*    88    16 */
        unsigned int               lags_len;             /*   104     4 */
        unsigned int               last_switch;          /*   108     4 */

        /* size: 112, cachelines: 2, members: 13 */
        /* sum members: 101, holes: 2, sum holes: 11 */
        /* last cacheline: 48 bytes */
};

After:

pahole -C dsa_switch_tree net/dsa/slave.o
struct dsa_switch_tree {
        struct list_head           list;                 /*     0    16 */
        struct list_head           ports;                /*    16    16 */
        struct raw_notifier_head   nh;                   /*    32     8 */
        unsigned int               index;                /*    40     4 */
        struct kref                refcount;             /*    44     4 */
        struct net_device * *      lags;                 /*    48     8 */
        const struct dsa_device_ops  * tag_ops;          /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        enum dsa_tag_protocol      default_proto;        /*    64     4 */
        bool                       setup;                /*    68     1 */

        /* XXX 3 bytes hole, try to pack */

        struct dsa_platform_data * pd;                   /*    72     8 */
        struct list_head           rtable;               /*    80    16 */
        unsigned int               lags_len;             /*    96     4 */
        unsigned int               last_switch;          /*   100     4 */

        /* size: 104, cachelines: 2, members: 13 */
        /* sum members: 101, holes: 1, sum holes: 3 */
        /* last cacheline: 40 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index cbbac75138d9..5d0fec6db3ae 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -136,9 +136,6 @@ struct dsa_switch_tree {
 	 */
 	struct net_device **lags;
 
-	/* Has this tree been applied to the hardware? */
-	bool setup;
-
 	/* Tagging protocol operations */
 	const struct dsa_device_ops *tag_ops;
 
@@ -147,6 +144,9 @@ struct dsa_switch_tree {
 	 */
 	enum dsa_tag_protocol default_proto;
 
+	/* Has this tree been applied to the hardware? */
+	bool setup;
+
 	/*
 	 * Configuration data for the platform device that owns
 	 * this dsa switch tree instance.
-- 
cgit v1.2.3


From 01ec4a2e8f01f027a0f06cad237c935da8d643bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 3 Jul 2021 00:23:39 +0200
Subject: headers/deps: USB: Optimize <linux/usb/ch9.h> dependencies, remove
 <linux/device.h>

The <linux/usb/ch9.h> header is used over 1,400 times in a typical distro
build, but few of its users actually need the full <linux/device.h> header.

          --------------------------------------------------------------------
          | Combined, preprocessed C code size of header, without line markers,
          | with comments stripped:
          -------------------------
  before: | #include <linux/usb/ch9.h>              | LOC:  7,078 | headers:  172
   after: | #include <linux/usb/ch9.h>              | LOC:    812 | headers:   38

Remove it and add it to the places that need it.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/ch9.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 1cffa34740b0..969e7dba6358 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -33,7 +33,6 @@
 #ifndef __LINUX_USB_CH9_H
 #define __LINUX_USB_CH9_H
 
-#include <linux/device.h>
 #include <uapi/linux/usb/ch9.h>
 
 /* USB 3.2 SuperSpeed Plus phy signaling rate generation and lane count */
@@ -45,6 +44,8 @@ enum usb_ssp_rate {
 	USB_SSP_GEN_2x2,
 };
 
+struct device;
+
 extern const char *usb_ep_type_string(int ep_type);
 extern const char *usb_speed_string(enum usb_device_speed speed);
 extern enum usb_device_speed usb_get_maximum_speed(struct device *dev);
-- 
cgit v1.2.3


From d82e2b27ad3a4fdd745332e0c310ae05660a1bf1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Wed, 5 Jan 2022 10:04:56 +0200
Subject: RDMA/mad: Delete duplicated init_query_mad functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several drivers used same function to initialize query MAD,
so move that function to global header file.

Link: https://lore.kernel.org/r/af6f35c590ff5ef56d0137351b8b295af0f7c13c.1641369858.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_smi.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index fdb8633cbaff..fc16b826b2c1 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -144,5 +144,15 @@ ib_get_smp_direction(struct ib_smp *smp)
 #define IB_NOTICE_TRAP_DR_NOTICE	0x80
 #define IB_NOTICE_TRAP_DR_TRUNC		0x40
 
-
+/**
+ * ib_init_query_mad - Initialize query MAD.
+ * @mad: MAD to initialize.
+ */
+static inline void ib_init_query_mad(struct ib_smp *mad)
+{
+	mad->base_version = IB_MGMT_BASE_VERSION;
+	mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+	mad->class_version = 1;
+	mad->method = IB_MGMT_METHOD_GET;
+}
 #endif /* IB_SMI_H */
-- 
cgit v1.2.3


From edce22e19bfa86efa2522d041d6367f2f099e8ed Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 5 Jan 2022 09:05:15 -0800
Subject: block: move rq_list macros to blk-mq.h

Move the request list macros to the header file that defines that struct
they operate on.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220105170518.3181469-2-kbusch@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 29 +++++++++++++++++++++++++++++
 include/linux/blkdev.h | 29 -----------------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 550996cf419c..bf64b94cd64e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -216,6 +216,35 @@ static inline unsigned short req_get_ioprio(struct request *req)
 #define rq_dma_dir(rq) \
 	(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
 
+#define rq_list_add(listptr, rq)	do {		\
+	(rq)->rq_next = *(listptr);			\
+	*(listptr) = rq;				\
+} while (0)
+
+#define rq_list_pop(listptr)				\
+({							\
+	struct request *__req = NULL;			\
+	if ((listptr) && *(listptr))	{		\
+		__req = *(listptr);			\
+		*(listptr) = __req->rq_next;		\
+	}						\
+	__req;						\
+})
+
+#define rq_list_peek(listptr)				\
+({							\
+	struct request *__req = NULL;			\
+	if ((listptr) && *(listptr))			\
+		__req = *(listptr);			\
+	__req;						\
+})
+
+#define rq_list_for_each(listptr, pos)			\
+	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
+
+#define rq_list_next(rq)	(rq)->rq_next
+#define rq_list_empty(list)	((list) == (struct request *) NULL)
+
 enum blk_eh_timer_return {
 	BLK_EH_DONE,		/* drivers has completed the command */
 	BLK_EH_RESET_TIMER,	/* reset timer and try again */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 22746b2d6825..9c95df26fc26 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1339,33 +1339,4 @@ struct io_comp_batch {
 
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
-#define rq_list_add(listptr, rq)	do {		\
-	(rq)->rq_next = *(listptr);			\
-	*(listptr) = rq;				\
-} while (0)
-
-#define rq_list_pop(listptr)				\
-({							\
-	struct request *__req = NULL;			\
-	if ((listptr) && *(listptr))	{		\
-		__req = *(listptr);			\
-		*(listptr) = __req->rq_next;		\
-	}						\
-	__req;						\
-})
-
-#define rq_list_peek(listptr)				\
-({							\
-	struct request *__req = NULL;			\
-	if ((listptr) && *(listptr))			\
-		__req = *(listptr);			\
-	__req;						\
-})
-
-#define rq_list_for_each(listptr, pos)			\
-	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
-
-#define rq_list_next(rq)	(rq)->rq_next
-#define rq_list_empty(list)	((list) == (struct request *) NULL)
-
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 3764fd05e1f89530e2ee5cbff0b638f2b1141b90 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 5 Jan 2022 09:05:16 -0800
Subject: block: introduce rq_list_for_each_safe macro

While iterating a list, a particular request may need to be removed for
special handling. Provide an iterator that can safely handle that.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20220105170518.3181469-3-kbusch@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index bf64b94cd64e..1467f0fa2142 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -242,6 +242,10 @@ static inline unsigned short req_get_ioprio(struct request *req)
 #define rq_list_for_each(listptr, pos)			\
 	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
 
+#define rq_list_for_each_safe(listptr, pos, nxt)			\
+	for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos);	\
+		pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)
+
 #define rq_list_next(rq)	(rq)->rq_next
 #define rq_list_empty(list)	((list) == (struct request *) NULL)
 
-- 
cgit v1.2.3


From d2528be7a8b09af9796a270debd14101a72bb552 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 5 Jan 2022 09:05:17 -0800
Subject: block: introduce rq_list_move

When iterating a list, a particular request may need to be moved for
special handling. Provide a helper function to achieve that so drivers
don't need to reimplement rqlist manipulation.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220105170518.3181469-4-kbusch@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1467f0fa2142..f40a05ecca4a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -249,6 +249,23 @@ static inline unsigned short req_get_ioprio(struct request *req)
 #define rq_list_next(rq)	(rq)->rq_next
 #define rq_list_empty(list)	((list) == (struct request *) NULL)
 
+/**
+ * rq_list_move() - move a struct request from one list to another
+ * @src: The source list @rq is currently in
+ * @dst: The destination list that @rq will be appended to
+ * @rq: The request to move
+ * @prev: The request preceding @rq in @src (NULL if @rq is the head)
+ */
+static void inline rq_list_move(struct request **src, struct request **dst,
+				struct request *rq, struct request *prev)
+{
+	if (prev)
+		prev->rq_next = rq->rq_next;
+	else
+		*src = rq->rq_next;
+	rq_list_add(dst, rq);
+}
+
 enum blk_eh_timer_return {
 	BLK_EH_DONE,		/* drivers has completed the command */
 	BLK_EH_RESET_TIMER,	/* reset timer and try again */
-- 
cgit v1.2.3


From fbdb0ba7051e9b0881708c1c7bb491363cb7e486 Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Wed, 15 Dec 2021 09:54:31 +0200
Subject: IB/mlx5: Expose NDR speed through MAD

Under MAD query port, Report NDR speed when NDR is supported in the port
capability mask.

Link: https://lore.kernel.org/r/a2ab630d2a634547db9b581faa9d65da2edb9d05.1639554831.git.leonro@nvidia.com
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_mad.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 465b0d0bdaf8..2e3843b761e8 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -276,6 +276,7 @@ enum ib_port_capability_mask2_bits {
 	IB_PORT_SWITCH_PORT_STATE_TABLE_SUP	= 1 << 3,
 	IB_PORT_LINK_WIDTH_2X_SUP		= 1 << 4,
 	IB_PORT_LINK_SPEED_HDR_SUP		= 1 << 5,
+	IB_PORT_LINK_SPEED_NDR_SUP		= 1 << 10,
 };
 
 #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
-- 
cgit v1.2.3


From ffa81a03267b450cb8c7bc0d327c05c99de579a4 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
Date: Mon, 25 Oct 2021 12:10:36 +0900
Subject: dt-bindings: clock: Add DT bindings for SMU of Toshiba Visconti
 TMPV770x SoC

Add device tree bindings for SMU (System Management Unit) controller of
Toshiba Visconti TMPV770x SoC series.

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211025031038.4180686-3-nobuhiro1.iwamatsu@toshiba.co.jp
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/toshiba,tmpv770x.h | 181 +++++++++++++++++++++++++++
 include/dt-bindings/reset/toshiba,tmpv770x.h |  41 ++++++
 2 files changed, 222 insertions(+)
 create mode 100644 include/dt-bindings/clock/toshiba,tmpv770x.h
 create mode 100644 include/dt-bindings/reset/toshiba,tmpv770x.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/toshiba,tmpv770x.h b/include/dt-bindings/clock/toshiba,tmpv770x.h
new file mode 100644
index 000000000000..5fce713001fd
--- /dev/null
+++ b/include/dt-bindings/clock/toshiba,tmpv770x.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLOCK_TOSHIBA_TMPV770X_H_
+#define _DT_BINDINGS_CLOCK_TOSHIBA_TMPV770X_H_
+
+/* PLL */
+#define TMPV770X_PLL_PIPLL0		0
+#define TMPV770X_PLL_PIPLL1		1
+#define TMPV770X_PLL_PIDNNPLL		2
+#define TMPV770X_PLL_PIETHERPLL		3
+#define TMPV770X_PLL_PIDDRCPLL		4
+#define TMPV770X_PLL_PIVOIFPLL		5
+#define TMPV770X_PLL_PIIMGERPLL		6
+#define TMPV770X_NR_PLL		7
+
+/* Clocks */
+#define TMPV770X_CLK_PIPLL1_DIV1	0
+#define TMPV770X_CLK_PIPLL1_DIV2	1
+#define TMPV770X_CLK_PIPLL1_DIV4	2
+#define TMPV770X_CLK_PIDNNPLL_DIV1	3
+#define TMPV770X_CLK_DDRC_PHY_PLL0	4
+#define TMPV770X_CLK_DDRC_PHY_PLL1	5
+#define TMPV770X_CLK_D_PHYPLL		6
+#define TMPV770X_CLK_PHY_PCIEPLL	7
+#define TMPV770X_CLK_CA53CL0		8
+#define TMPV770X_CLK_CA53CL1		9
+#define TMPV770X_CLK_PISDMAC		10
+#define TMPV770X_CLK_PIPDMAC0		11
+#define TMPV770X_CLK_PIPDMAC1		12
+#define TMPV770X_CLK_PIWRAM		13
+#define TMPV770X_CLK_DDRC0		14
+#define TMPV770X_CLK_DDRC0_SCLK		15
+#define TMPV770X_CLK_DDRC0_NCLK		16
+#define TMPV770X_CLK_DDRC0_MCLK		17
+#define TMPV770X_CLK_DDRC0_APBCLK	18
+#define TMPV770X_CLK_DDRC1		19
+#define TMPV770X_CLK_DDRC1_SCLK		20
+#define TMPV770X_CLK_DDRC1_NCLK		21
+#define TMPV770X_CLK_DDRC1_MCLK		22
+#define TMPV770X_CLK_DDRC1_APBCLK	23
+#define TMPV770X_CLK_HOX		24
+#define TMPV770X_CLK_PCIE_MSTR		25
+#define TMPV770X_CLK_PCIE_AUX		26
+#define TMPV770X_CLK_PIINTC		27
+#define TMPV770X_CLK_PIETHER_BUS	28
+#define TMPV770X_CLK_PISPI0		29
+#define TMPV770X_CLK_PISPI1		30
+#define TMPV770X_CLK_PISPI2		31
+#define TMPV770X_CLK_PISPI3		32
+#define TMPV770X_CLK_PISPI4		33
+#define TMPV770X_CLK_PISPI5		34
+#define TMPV770X_CLK_PISPI6		35
+#define TMPV770X_CLK_PIUART0		36
+#define TMPV770X_CLK_PIUART1		37
+#define TMPV770X_CLK_PIUART2		38
+#define TMPV770X_CLK_PIUART3		39
+#define TMPV770X_CLK_PII2C0		40
+#define TMPV770X_CLK_PII2C1		41
+#define TMPV770X_CLK_PII2C2		42
+#define TMPV770X_CLK_PII2C3		43
+#define TMPV770X_CLK_PII2C4		44
+#define TMPV770X_CLK_PII2C5		45
+#define TMPV770X_CLK_PII2C6		46
+#define TMPV770X_CLK_PII2C7		47
+#define TMPV770X_CLK_PII2C8		48
+#define TMPV770X_CLK_PIGPIO		49
+#define TMPV770X_CLK_PIPGM		50
+#define TMPV770X_CLK_PIPCMIF		51
+#define TMPV770X_CLK_PIPCMIF_AUDIO_O	52
+#define TMPV770X_CLK_PIPCMIF_AUDIO_I	53
+#define TMPV770X_CLK_PICMPT0		54
+#define TMPV770X_CLK_PICMPT1		55
+#define TMPV770X_CLK_PITSC		56
+#define TMPV770X_CLK_PIUWDT		57
+#define TMPV770X_CLK_PISWDT		58
+#define TMPV770X_CLK_WDTCLK		59
+#define TMPV770X_CLK_PISUBUS_150M	60
+#define TMPV770X_CLK_PISUBUS_300M	61
+#define TMPV770X_CLK_PIPMU		62
+#define TMPV770X_CLK_PIGPMU		63
+#define TMPV770X_CLK_PITMU		64
+#define TMPV770X_CLK_WRCK		65
+#define TMPV770X_CLK_PIEMM		66
+#define TMPV770X_CLK_PIMISC		67
+#define TMPV770X_CLK_PIGCOMM		68
+#define TMPV770X_CLK_PIDCOMM		69
+#define TMPV770X_CLK_PICKMON		70
+#define TMPV770X_CLK_PIMBUS		71
+#define TMPV770X_CLK_SBUSCLK		72
+#define TMPV770X_CLK_DDR0_APBCLKCLK	73
+#define TMPV770X_CLK_DDR1_APBCLKCLK	74
+#define TMPV770X_CLK_DSP0_PBCLK		75
+#define TMPV770X_CLK_DSP1_PBCLK		76
+#define TMPV770X_CLK_DSP2_PBCLK		77
+#define TMPV770X_CLK_DSP3_PBCLK		78
+#define TMPV770X_CLK_DSVIIF0_APBCLK	79
+#define TMPV770X_CLK_VIIF0_APBCLK	80
+#define TMPV770X_CLK_VIIF0_CFGCLK	81
+#define TMPV770X_CLK_VIIF1_APBCLK	82
+#define TMPV770X_CLK_VIIF1_CFGCLK	83
+#define TMPV770X_CLK_VIIF2_APBCLK	84
+#define TMPV770X_CLK_VIIF2_CFGCLK	85
+#define TMPV770X_CLK_VIIF3_APBCLK	86
+#define TMPV770X_CLK_VIIF3_CFGCLK	87
+#define TMPV770X_CLK_VIIF4_APBCLK	88
+#define TMPV770X_CLK_VIIF4_CFGCLK	89
+#define TMPV770X_CLK_VIIF5_APBCLK	90
+#define TMPV770X_CLK_VIIF5_CFGCLK	91
+#define TMPV770X_CLK_VOIF_SBUSCLK	92
+#define TMPV770X_CLK_VOIF_PROCCLK	93
+#define TMPV770X_CLK_VOIF_DPHYCFGCLK	94
+#define TMPV770X_CLK_DNN0		95
+#define TMPV770X_CLK_STMAT		96
+#define TMPV770X_CLK_HWA0		97
+#define TMPV770X_CLK_AFFINE0		98
+#define TMPV770X_CLK_HAMAT		99
+#define TMPV770X_CLK_SMLDB		100
+#define TMPV770X_CLK_HWA0_ASYNC		101
+#define TMPV770X_CLK_HWA2		102
+#define TMPV770X_CLK_FLMAT		103
+#define TMPV770X_CLK_PYRAMID		104
+#define TMPV770X_CLK_HWA2_ASYNC		105
+#define TMPV770X_CLK_DSP0		106
+#define TMPV770X_CLK_VIIFBS0		107
+#define TMPV770X_CLK_VIIFBS0_L2ISP	108
+#define TMPV770X_CLK_VIIFBS0_L1ISP	109
+#define TMPV770X_CLK_VIIFBS0_PROC	110
+#define TMPV770X_CLK_VIIFBS1		111
+#define TMPV770X_CLK_VIIFBS2		112
+#define TMPV770X_CLK_VIIFOP_MBUS	113
+#define TMPV770X_CLK_VIIFOP0_PROC	114
+#define TMPV770X_CLK_PIETHER_2P5M	115
+#define TMPV770X_CLK_PIETHER_25M	116
+#define TMPV770X_CLK_PIETHER_50M	117
+#define TMPV770X_CLK_PIETHER_125M	118
+#define TMPV770X_CLK_VOIF0_DPHYCFG	119
+#define TMPV770X_CLK_VOIF0_PROC		120
+#define TMPV770X_CLK_VOIF0_SBUS		121
+#define TMPV770X_CLK_VOIF0_DSIREF	122
+#define TMPV770X_CLK_VOIF0_PIXEL	123
+#define TMPV770X_CLK_PIREFCLK		124
+#define TMPV770X_CLK_SBUS		125
+#define TMPV770X_CLK_BUSLCK		126
+#define TMPV770X_NR_CLK			127
+
+/* Reset */
+#define TMPV770X_RESET_PIETHER_2P5M	0
+#define TMPV770X_RESET_PIETHER_25M	1
+#define TMPV770X_RESET_PIETHER_50M	2
+#define TMPV770X_RESET_PIETHER_125M	3
+#define TMPV770X_RESET_HOX		4
+#define TMPV770X_RESET_PCIE_MSTR	5
+#define TMPV770X_RESET_PCIE_AUX		6
+#define TMPV770X_RESET_PIINTC		7
+#define TMPV770X_RESET_PIETHER_BUS	8
+#define TMPV770X_RESET_PISPI0		9
+#define TMPV770X_RESET_PISPI1		10
+#define TMPV770X_RESET_PISPI2		11
+#define TMPV770X_RESET_PISPI3		12
+#define TMPV770X_RESET_PISPI4		13
+#define TMPV770X_RESET_PISPI5		14
+#define TMPV770X_RESET_PISPI6		15
+#define TMPV770X_RESET_PIUART0		16
+#define TMPV770X_RESET_PIUART1		17
+#define TMPV770X_RESET_PIUART2		18
+#define TMPV770X_RESET_PIUART3		19
+#define TMPV770X_RESET_PII2C0		20
+#define TMPV770X_RESET_PII2C1		21
+#define TMPV770X_RESET_PII2C2		22
+#define TMPV770X_RESET_PII2C3		23
+#define TMPV770X_RESET_PII2C4		24
+#define TMPV770X_RESET_PII2C5		25
+#define TMPV770X_RESET_PII2C6		26
+#define TMPV770X_RESET_PII2C7		27
+#define TMPV770X_RESET_PII2C8		28
+#define TMPV770X_RESET_PIPCMIF		29
+#define TMPV770X_RESET_PICKMON		30
+#define TMPV770X_RESET_SBUSCLK		31
+#define TMPV770X_NR_RESET		32
+
+#endif /*_DT_BINDINGS_CLOCK_TOSHIBA_TMPV770X_H_ */
diff --git a/include/dt-bindings/reset/toshiba,tmpv770x.h b/include/dt-bindings/reset/toshiba,tmpv770x.h
new file mode 100644
index 000000000000..c1007acb1941
--- /dev/null
+++ b/include/dt-bindings/reset/toshiba,tmpv770x.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_RESET_TOSHIBA_TMPV770X_H_
+#define _DT_BINDINGS_RESET_TOSHIBA_TMPV770X_H_
+
+/* Reset */
+#define TMPV770X_RESET_PIETHER_2P5M	0
+#define TMPV770X_RESET_PIETHER_25M	1
+#define TMPV770X_RESET_PIETHER_50M	2
+#define TMPV770X_RESET_PIETHER_125M	3
+#define TMPV770X_RESET_HOX		4
+#define TMPV770X_RESET_PCIE_MSTR	5
+#define TMPV770X_RESET_PCIE_AUX		6
+#define TMPV770X_RESET_PIINTC		7
+#define TMPV770X_RESET_PIETHER_BUS	8
+#define TMPV770X_RESET_PISPI0		9
+#define TMPV770X_RESET_PISPI1		10
+#define TMPV770X_RESET_PISPI2		11
+#define TMPV770X_RESET_PISPI3		12
+#define TMPV770X_RESET_PISPI4		13
+#define TMPV770X_RESET_PISPI5		14
+#define TMPV770X_RESET_PISPI6		15
+#define TMPV770X_RESET_PIUART0		16
+#define TMPV770X_RESET_PIUART1		17
+#define TMPV770X_RESET_PIUART2		18
+#define TMPV770X_RESET_PIUART3		19
+#define TMPV770X_RESET_PII2C0		20
+#define TMPV770X_RESET_PII2C1		21
+#define TMPV770X_RESET_PII2C2		22
+#define TMPV770X_RESET_PII2C3		23
+#define TMPV770X_RESET_PII2C4		24
+#define TMPV770X_RESET_PII2C5		25
+#define TMPV770X_RESET_PII2C6		26
+#define TMPV770X_RESET_PII2C7		27
+#define TMPV770X_RESET_PII2C8		28
+#define TMPV770X_RESET_PIPCMIF		29
+#define TMPV770X_RESET_PICKMON		30
+#define TMPV770X_RESET_SBUSCLK		31
+#define TMPV770X_NR_RESET		32
+
+#endif /*_DT_BINDINGS_RESET_TOSHIBA_TMPV770X_H_ */
-- 
cgit v1.2.3


From 4a48ef70b93b8c7ed5190adfca18849e76387b80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Mon, 3 Jan 2022 16:08:06 +0100
Subject: xdp: Allow registering memory model without rxq reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The functions that register an XDP memory model take a struct xdp_rxq as
parameter, but the RXQ is not actually used for anything other than pulling
out the struct xdp_mem_info that it embeds. So refactor the register
functions and export variants that just take a pointer to the xdp_mem_info.

This is in preparation for enabling XDP_REDIRECT in bpf_prog_run(), using a
page_pool instance that is not connected to any network device.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220103150812.87914-2-toke@redhat.com
---
 include/net/xdp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 447f9b1578f3..8f0812e4996d 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 			       enum xdp_mem_type type, void *allocator);
 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
+int xdp_reg_mem_model(struct xdp_mem_info *mem,
+		      enum xdp_mem_type type, void *allocator);
+void xdp_unreg_mem_model(struct xdp_mem_info *mem);
 
 /* Drivers not supporting XDP metadata can use this helper, which
  * rejects any room expansion for metadata as a result.
-- 
cgit v1.2.3


From 35b2e549894b7ef0b6e7f3a70c2ab75b767cfce9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Mon, 3 Jan 2022 16:08:07 +0100
Subject: page_pool: Add callback to init pages when they are allocated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new callback function to page_pool that, if set, will be called every
time a new page is allocated. This will be used from bpf_test_run() to
initialise the page data with the data provided by userspace when running
XDP programs with redirect turned on.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20220103150812.87914-3-toke@redhat.com
---
 include/net/page_pool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index a4082406a003..d807b6800a4a 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -80,6 +80,8 @@ struct page_pool_params {
 	enum dma_data_direction dma_dir; /* DMA mapping direction */
 	unsigned int	max_len; /* max DMA sync memory size */
 	unsigned int	offset;  /* DMA addr offset */
+	void (*init_callback)(struct page *page, void *arg);
+	void *init_arg;
 };
 
 struct page_pool {
-- 
cgit v1.2.3


From 64693ec7774e471f817a725686d93903e919a2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Mon, 3 Jan 2022 16:08:08 +0100
Subject: page_pool: Store the XDP mem id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Store the XDP mem ID inside the page_pool struct so it can be retrieved
later for use in bpf_prog_run().

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20220103150812.87914-4-toke@redhat.com
---
 include/net/page_pool.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index d807b6800a4a..79a805542d0f 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -96,6 +96,7 @@ struct page_pool {
 	unsigned int frag_offset;
 	struct page *frag_page;
 	long frag_users;
+	u32 xdp_mem_id;
 
 	/*
 	 * Data structure for allocation side
@@ -170,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page);
 
 struct page_pool *page_pool_create(const struct page_pool_params *params);
 
+struct xdp_mem_info;
+
 #ifdef CONFIG_PAGE_POOL
 void page_pool_destroy(struct page_pool *pool);
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+			   struct xdp_mem_info *mem);
 void page_pool_release_page(struct page_pool *pool, struct page *page);
 void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 			     int count);
@@ -182,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool)
 }
 
 static inline void page_pool_use_xdp_mem(struct page_pool *pool,
-					 void (*disconnect)(void *))
+					 void (*disconnect)(void *),
+					 struct xdp_mem_info *mem)
 {
 }
 static inline void page_pool_release_page(struct page_pool *pool,
-- 
cgit v1.2.3


From d53ad5d8b218a885e95080d4d3d556b16b91b1b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Mon, 3 Jan 2022 16:08:09 +0100
Subject: xdp: Move conversion to xdp_frame out of map functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All map redirect functions except XSK maps convert xdp_buff to xdp_frame
before enqueueing it. So move this conversion of out the map functions
and into xdp_do_redirect(). This removes a bit of duplicated code, but more
importantly it makes it possible to support caller-allocated xdp_frame
structures, which will be added in a subsequent commit.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220103150812.87914-5-toke@redhat.com
---
 include/linux/bpf.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 26753139d5b4..6e947cd91152 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1669,17 +1669,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 struct btf *bpf_get_btf_vmlinux(void);
 
 /* Map specifics */
-struct xdp_buff;
+struct xdp_frame;
 struct sk_buff;
 struct bpf_dtab_netdev;
 struct bpf_cpu_map_entry;
 
 void __dev_flush(void);
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
 			  struct bpf_map *map, bool exclude_ingress);
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
@@ -1688,7 +1688,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 			   bool exclude_ingress);
 
 void __cpu_map_flush(void);
-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
 int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
 			     struct sk_buff *skb);
@@ -1866,26 +1866,26 @@ static inline void __dev_flush(void)
 {
 }
 
-struct xdp_buff;
+struct xdp_frame;
 struct bpf_dtab_netdev;
 struct bpf_cpu_map_entry;
 
 static inline
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx)
 {
 	return 0;
 }
 
 static inline
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx)
 {
 	return 0;
 }
 
 static inline
-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
 			  struct bpf_map *map, bool exclude_ingress)
 {
 	return 0;
@@ -1913,7 +1913,7 @@ static inline void __cpu_map_flush(void)
 }
 
 static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
-				  struct xdp_buff *xdp,
+				  struct xdp_frame *xdpf,
 				  struct net_device *dev_rx)
 {
 	return 0;
-- 
cgit v1.2.3


From 1372d34ccf6dd480332b2bcb2fd59a2b9a0df415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Mon, 3 Jan 2022 16:08:10 +0100
Subject: xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an xdp_do_redirect_frame() variant which supports pre-computed
xdp_frame structures. This will be used in bpf_prog_run() to avoid having
to write to the xdp_frame structure when the XDP program doesn't modify the
frame boundaries.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220103150812.87914-6-toke@redhat.com
---
 include/linux/filter.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 60eec80fa1d4..71fa57b88bfc 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1019,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 int xdp_do_redirect(struct net_device *dev,
 		    struct xdp_buff *xdp,
 		    struct bpf_prog *prog);
+int xdp_do_redirect_frame(struct net_device *dev,
+			  struct xdp_buff *xdp,
+			  struct xdp_frame *xdpf,
+			  struct bpf_prog *prog);
 void xdp_do_flush(void);
 
 /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
-- 
cgit v1.2.3


From 335e4dd67b480c8fa571ea7e71af0d22047fcfb7 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 4 Jan 2022 09:46:06 +0100
Subject: xen/x86: obtain upper 32 bits of video frame buffer address for Dom0

The hypervisor has been supplying this information for a couple of major
releases. Make use of it. The need to set a flag in the capabilities
field also points out that the prior setting of that field from the
hypervisor interface's gbl_caps one was wrong, so that code gets deleted
(there's also no equivalent of this in native boot code).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

Link: https://lore.kernel.org/r/a3df8bf3-d044-b7bb-3383-cd5239d6d4af@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/interface/xen.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 5e9916939268..0ca23eca2a9c 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -722,6 +722,9 @@ struct dom0_vga_console_info {
 			uint32_t gbl_caps;
 			/* Mode attributes (offset 0x0, VESA command 0x4f01). */
 			uint16_t mode_attrs;
+			uint16_t pad;
+			/* high 32 bits of lfb_base */
+			uint32_t ext_lfb_base;
 		} vesa_lfb;
 	} u;
 };
-- 
cgit v1.2.3


From 9dd060afe2dfd4e3f67b6732fdc681e52cd7cbd9 Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Thu, 9 Dec 2021 22:05:33 +0200
Subject: xen/balloon: Bring alloc(free)_xenballooned_pages helpers back

This patch rolls back some of the changes introduced by commit
121f2faca2c0a "xen/balloon: rename alloc/free_xenballooned_pages"
in order to make possible to still allocate xenballooned pages
if CONFIG_XEN_UNPOPULATED_ALLOC is enabled.

On Arm the unpopulated pages will be allocated on top of extended
regions provided by Xen via device-tree (the subsequent patches
will add required bits to support unpopulated-alloc feature on Arm).
The problem is that extended regions feature has been introduced
into Xen quite recently (during 4.16 release cycle). So this
effectively means that Linux must only use unpopulated-alloc on Arm
if it is running on "new Xen" which advertises these regions.
But, it will only be known after parsing the "hypervisor" node
at boot time, so before doing that we cannot assume anything.

In order to keep working if CONFIG_XEN_UNPOPULATED_ALLOC is enabled
and the extended regions are not advertised (Linux is running on
"old Xen", etc) we need the fallback to alloc_xenballooned_pages().

This way we wouldn't reduce the amount of memory usable (wasting
RAM pages) for any of the external mappings anymore (and eliminate
XSA-300) with "new Xen", but would be still functional ballooning
out RAM pages with "old Xen".

Also rename alloc(free)_xenballooned_pages to xen_alloc(free)_ballooned_pages
and make xen_alloc(free)_unpopulated_pages static inline in xen.h
if CONFIG_XEN_UNPOPULATED_ALLOC is disabled.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Link: https://lore.kernel.org/r/1639080336-26573-4-git-send-email-olekstysh@gmail.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/balloon.h |  3 +++
 include/xen/xen.h     | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index e93d4f0088c5..f78a6cc94f1a 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -26,6 +26,9 @@ extern struct balloon_stats balloon_stats;
 
 void balloon_set_new_target(unsigned long target);
 
+int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages);
+void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages);
+
 #ifdef CONFIG_XEN_BALLOON
 void xen_balloon_init(void);
 #else
diff --git a/include/xen/xen.h b/include/xen/xen.h
index 9f031b5faa54..86c5b37684d9 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -52,7 +52,21 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 extern u64 xen_saved_max_mem_size;
 #endif
 
+#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
 int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages);
 void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);
+#else
+#include <xen/balloon.h>
+static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages,
+		struct page **pages)
+{
+	return xen_alloc_ballooned_pages(nr_pages, pages);
+}
+static inline void xen_free_unpopulated_pages(unsigned int nr_pages,
+		struct page **pages)
+{
+	xen_free_ballooned_pages(nr_pages, pages);
+}
+#endif
 
 #endif	/* _XEN_XEN_H */
-- 
cgit v1.2.3


From d1a928eac72962b562162c25baf45ce147e27247 Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Thu, 9 Dec 2021 22:05:34 +0200
Subject: xen/unpopulated-alloc: Add mechanism to use Xen resource

The main reason of this change is that unpopulated-alloc
code cannot be used in its current form on Arm, but there
is a desire to reuse it to avoid wasting real RAM pages
for the grant/foreign mappings.

The problem is that system "iomem_resource" is used for
the address space allocation, but the really unallocated
space can't be figured out precisely by the domain on Arm
without hypervisor involvement. For example, not all device
I/O regions are known by the time domain starts creating
grant/foreign mappings. And following the advise from
"iomem_resource" we might end up reusing these regions by
a mistake. So, the hypervisor which maintains the P2M for
the domain is in the best position to provide unused regions
of guest physical address space which could be safely used
to create grant/foreign mappings.

Introduce new helper arch_xen_unpopulated_init() which purpose
is to create specific Xen resource based on the memory regions
provided by the hypervisor to be used as unused space for Xen
scratch pages. If arch doesn't define arch_xen_unpopulated_init()
the default "iomem_resource" will be used.

Update the arguments list of allocate_resource() in fill_list()
to always allocate a region from the hotpluggable range
(maximum possible addressable physical memory range for which
the linear mapping could be created). If arch doesn't define
arch_get_mappable_range() the default range (0,-1) will be used.

The behaviour on x86 won't be changed by current patch as both
arch_xen_unpopulated_init() and arch_get_mappable_range()
are not implemented for it.

Also fallback to allocate xenballooned pages (balloon out RAM
pages) if we do not have any suitable resource to work with
(target_resource is invalid) and as the result we won't be able
to provide unpopulated pages on a request.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Link: https://lore.kernel.org/r/1639080336-26573-5-git-send-email-olekstysh@gmail.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/xen.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/xen/xen.h b/include/xen/xen.h
index 86c5b37684d9..a99bab817523 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -55,6 +55,8 @@ extern u64 xen_saved_max_mem_size;
 #ifdef CONFIG_XEN_UNPOPULATED_ALLOC
 int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages);
 void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);
+#include <linux/ioport.h>
+int arch_xen_unpopulated_init(struct resource **res);
 #else
 #include <xen/balloon.h>
 static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages,
-- 
cgit v1.2.3


From a457fd5660efa9cf960d2461156a1025bfdb13fa Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@wdc.com>
Date: Fri, 26 Nov 2021 17:05:51 +0530
Subject: RISC-V: KVM: Add VM capability to allow userspace get GPA bits

The number of GPA bits supported for a RISC-V Guest/VM is based on the
MMU mode used by the G-stage translation. The KVM RISC-V will detect and
use the best possible MMU mode for the G-stage in kvm_arch_init().

We add a generic VM capability KVM_CAP_VM_GPA_BITS which can be used by
the KVM userspace to get the number of GPA (guest physical address) bits
supported for a Guest/VM.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Reviewed-and-tested-by: Atish Patra <atishp@rivosinc.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1daa45268de2..469f05d69c8d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1131,6 +1131,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 #define KVM_CAP_ARM_MTE 205
 #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_VM_GPA_BITS 207
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From ae16d059f8c9409eba0c412639def0494765b761 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 26 Oct 2021 18:22:44 +0200
Subject: mm/slub: Make object_err() static

There are no callers outside of mm/slub.c anymore.

Move freelist_corrupted() that calls object_err() to avoid a need for
forward declaration.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
---
 include/linux/slub_def.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 0fa751b946fa..1ef68d4de9c0 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -156,9 +156,6 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
 }
 #endif
 
-void object_err(struct kmem_cache *s, struct page *page,
-		u8 *object, char *reason);
-
 void *fixup_red_left(struct kmem_cache *s, void *p);
 
 static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
-- 
cgit v1.2.3


From d122019bf061cccc4583eb9ad40bf58c2fe517be Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 4 Oct 2021 14:45:51 +0100
Subject: mm: Split slab into its own type

Make struct slab independent of struct page. It still uses the
underlying memory in struct page for storing slab-specific data, but
slab and slub can now be weaned off using struct page directly.  Some of
the wrapper functions (slab_address() and slab_order()) still need to
cast to struct folio, but this is a significant disentanglement.

[ vbabka@suse.cz: Rebase on folios, use folio instead of page where
  possible.

  Do not duplicate flags field in struct slab, instead make the related
  accessors go through slab_folio(). For testing pfmemalloc use the
  folio_*_active flag accessors directly so the PageSlabPfmemalloc
  wrappers can be removed later.

  Make folio_slab() expect only folio_test_slab() == true folios and
  virt_to_slab() return NULL when folio_test_slab() == false.

  Move struct slab to mm/slab.h.

  Don't represent with struct slab pages that are not true slab pages,
  but just a compound page obtained directly rom page allocator (with
  large kmalloc() for SLUB and SLOB). ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
---
 include/linux/mm_types.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c3a6e6209600..1ae3537c7920 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -56,11 +56,11 @@ struct mem_cgroup;
  * in each subpage, but you may need to restore some of their values
  * afterwards.
  *
- * SLUB uses cmpxchg_double() to atomically update its freelist and
- * counters.  That requires that freelist & counters be adjacent and
- * double-word aligned.  We align all struct pages to double-word
- * boundaries, and ensure that 'freelist' is aligned within the
- * struct.
+ * SLUB uses cmpxchg_double() to atomically update its freelist and counters.
+ * That requires that freelist & counters in struct slab be adjacent and
+ * double-word aligned. Because struct slab currently just reinterprets the
+ * bits of struct page, we align all struct pages to double-word boundaries,
+ * and ensure that 'freelist' is aligned within struct slab.
  */
 #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
 #define _struct_page_alignment	__aligned(2 * sizeof(unsigned long))
-- 
cgit v1.2.3


From 0b3eb091d5759479d44cb793fad2c51ea06bdcec Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 4 Oct 2021 14:45:56 +0100
Subject: mm: Convert check_heap_object() to use struct slab

Ensure that we're not seeing a tail page inside __check_heap_object() by
converting to a slab instead of a page.  Take the opportunity to mark
the slab as const since we're not modifying it.  Also move the
declaration of __check_heap_object() to mm/slab.h so it's not available
to the wider kernel.

[ vbabka@suse.cz: in check_heap_object() only convert to struct slab for
  actual PageSlab pages; use folio as intermediate step instead of page ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
---
 include/linux/slab.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 181045148b06..367366f1d1ff 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object);
 void kmem_dump_obj(void *object);
 #endif
 
-#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
-void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
-			bool to_user);
-#else
-static inline void __check_heap_object(const void *ptr, unsigned long n,
-				       struct page *page, bool to_user) { }
-#endif
-
 /*
  * Some archs want to perform DMA into kmalloc caches and need a guaranteed
  * alignment larger than the alignment of a 64-bit integer.
-- 
cgit v1.2.3


From bb192ed9aa7191a5d65548f82c42b6750d65f569 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 3 Nov 2021 15:39:59 +0100
Subject: mm/slub: Convert most struct page to struct slab by spatch

The majority of conversion from struct page to struct slab in SLUB
internals can be delegated to a coccinelle semantic patch. This includes
renaming of variables with 'page' in name to 'slab', and similar.

Big thanks to Julia Lawall and Luis Chamberlain for help with
coccinelle.

// Options: --include-headers --no-includes --smpl-spacing include/linux/slub_def.h mm/slub.c
// Note: needs coccinelle 1.1.1 to avoid breaking whitespace, and ocaml for the
// embedded script

// build list of functions to exclude from applying the next rule
@initialize:ocaml@
@@

let ok_function p =
  not (List.mem (List.hd p).current_element ["nearest_obj";"obj_to_index";"objs_per_slab_page";"__slab_lock";"__slab_unlock";"free_nonslab_page";"kmalloc_large_node"])

// convert the type from struct page to struct page in all functions except the
// list from previous rule
// this also affects struct kmem_cache_cpu, but that's ok
@@
position p : script:ocaml() { ok_function p };
@@

- struct page@p
+ struct slab

// in struct kmem_cache_cpu, change the name from page to slab
// the type was already converted by the previous rule
@@
@@

struct kmem_cache_cpu {
...
-struct slab *page;
+struct slab *slab;
...
}

// there are many places that use c->page which is now c->slab after the
// previous rule
@@
struct kmem_cache_cpu *c;
@@

-c->page
+c->slab

@@
@@

struct kmem_cache {
...
- unsigned int cpu_partial_pages;
+ unsigned int cpu_partial_slabs;
...
}

@@
struct kmem_cache *s;
@@

- s->cpu_partial_pages
+ s->cpu_partial_slabs

@@
@@

static void
- setup_page_debug(
+ setup_slab_debug(
 ...)
 {...}

@@
@@

- setup_page_debug(
+ setup_slab_debug(
 ...);

// for all functions (with exceptions), change any "struct slab *page"
// parameter to "struct slab *slab" in the signature, and generally all
// occurences of "page" to "slab" in the body - with some special cases.

@@
identifier fn !~ "free_nonslab_page|obj_to_index|objs_per_slab_page|nearest_obj";
@@
 fn(...,
-   struct slab *page
+   struct slab *slab
    ,...)
 {
<...
- page
+ slab
...>
 }

// similar to previous but the param is called partial_page
@@
identifier fn;
@@

 fn(...,
-   struct slab *partial_page
+   struct slab *partial_slab
    ,...)
 {
<...
- partial_page
+ partial_slab
...>
 }

// similar to previous but for functions that take pointer to struct page ptr
@@
identifier fn;
@@

 fn(...,
-   struct slab **ret_page
+   struct slab **ret_slab
    ,...)
 {
<...
- ret_page
+ ret_slab
...>
 }

// functions converted by previous rules that were temporarily called using
// slab_page(E) so we want to remove the wrapper now that they accept struct
// slab ptr directly
@@
identifier fn =~ "slab_free|do_slab_free";
expression E;
@@

 fn(...,
- slab_page(E)
+ E
  ,...)

// similar to previous but for another pattern
@@
identifier fn =~ "slab_pad_check|check_object";
@@

 fn(...,
- folio_page(folio, 0)
+ slab
  ,...)

// functions that were returning struct page ptr and now will return struct
// slab ptr, including slab_page() wrapper removal
@@
identifier fn =~ "allocate_slab|new_slab";
expression E;
@@

 static
-struct slab *
+struct slab *
 fn(...)
 {
<...
- slab_page(E)
+ E
...>
 }

// rename any former struct page * declarations
@@
@@

struct slab *
(
- page
+ slab
|
- partial_page
+ partial_slab
|
- oldpage
+ oldslab
)
;

// this has to be separate from previous rule as page and page2 appear at the
// same line
@@
@@

struct slab *
-page2
+slab2
;

// similar but with initial assignment
@@
expression E;
@@

struct slab *
(
- page
+ slab
|
- flush_page
+ flush_slab
|
- discard_page
+ slab_to_discard
|
- page_to_unfreeze
+ slab_to_unfreeze
)
= E;

// convert most of struct page to struct slab usage inside functions (with
// exceptions), including specific variable renames
@@
identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node";
expression E;
@@

 fn(...)
 {
<...
(
- int pages;
+ int slabs;
|
- int pages = E;
+ int slabs = E;
|
- page
+ slab
|
- flush_page
+ flush_slab
|
- partial_page
+ partial_slab
|
- oldpage->pages
+ oldslab->slabs
|
- oldpage
+ oldslab
|
- unsigned int nr_pages;
+ unsigned int nr_slabs;
|
- nr_pages
+ nr_slabs
|
- unsigned int partial_pages = E;
+ unsigned int partial_slabs = E;
|
- partial_pages
+ partial_slabs
)
...>
 }

// this has to be split out from the previous rule so that lines containing
// multiple matching changes will be fully converted
@@
identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node";
@@

 fn(...)
 {
<...
(
- slab->pages
+ slab->slabs
|
- pages
+ slabs
|
- page2
+ slab2
|
- discard_page
+ slab_to_discard
|
- page_to_unfreeze
+ slab_to_unfreeze
)
...>
 }

// after we simply changed all occurences of page to slab, some usages need
// adjustment for slab-specific functions, or use slab_page() wrapper
@@
identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node";
@@

 fn(...)
 {
<...
(
- page_slab(slab)
+ slab
|
- kasan_poison_slab(slab)
+ kasan_poison_slab(slab_page(slab))
|
- page_address(slab)
+ slab_address(slab)
|
- page_size(slab)
+ slab_size(slab)
|
- PageSlab(slab)
+ folio_test_slab(slab_folio(slab))
|
- page_to_nid(slab)
+ slab_nid(slab)
|
- compound_order(slab)
+ slab_order(slab)
)
...>
 }

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/slub_def.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 1ef68d4de9c0..00d99afe1c0e 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -48,9 +48,9 @@ enum stat_item {
 struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to next available object */
 	unsigned long tid;	/* Globally unique transaction id */
-	struct page *page;	/* The slab from which we are allocating */
+	struct slab *slab;	/* The slab from which we are allocating */
 #ifdef CONFIG_SLUB_CPU_PARTIAL
-	struct page *partial;	/* Partially allocated frozen slabs */
+	struct slab *partial;	/* Partially allocated frozen slabs */
 #endif
 	local_lock_t lock;	/* Protects the fields above */
 #ifdef CONFIG_SLUB_STATS
@@ -100,7 +100,7 @@ struct kmem_cache {
 	/* Number of per cpu partial objects to keep around */
 	unsigned int cpu_partial;
 	/* Number of per cpu partial pages to keep around */
-	unsigned int cpu_partial_pages;
+	unsigned int cpu_partial_slabs;
 #endif
 	struct kmem_cache_order_objects oo;
 
-- 
cgit v1.2.3


From c2092c12064a9728b2928979f88575cc1c2247fa Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 15 Nov 2021 16:55:15 +0100
Subject: mm/slub: Finish struct page to struct slab conversion

Update comments mentioning pages to mention slabs where appropriate.
Also some goto labels.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
---
 include/linux/slub_def.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 00d99afe1c0e..8a9c2876ca89 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -99,7 +99,7 @@ struct kmem_cache {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
 	/* Number of per cpu partial objects to keep around */
 	unsigned int cpu_partial;
-	/* Number of per cpu partial pages to keep around */
+	/* Number of per cpu partial slabs to keep around */
 	unsigned int cpu_partial_slabs;
 #endif
 	struct kmem_cache_order_objects oo;
-- 
cgit v1.2.3


From 40f3bf0cb04c91d33531b1b95788ad2f0e4062cf Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 2 Nov 2021 15:42:04 +0100
Subject: mm: Convert struct page to struct slab in functions used by other
 subsystems

KASAN, KFENCE and memcg interact with SLAB or SLUB internals through
functions nearest_obj(), obj_to_index() and objs_per_slab() that use
struct page as parameter. This patch converts it to struct slab
including all callers, through a coccinelle semantic patch.

// Options: --include-headers --no-includes --smpl-spacing include/linux/slab_def.h include/linux/slub_def.h mm/slab.h mm/kasan/*.c mm/kfence/kfence_test.c mm/memcontrol.c mm/slab.c mm/slub.c
// Note: needs coccinelle 1.1.1 to avoid breaking whitespace

@@
@@

-objs_per_slab_page(
+objs_per_slab(
 ...
 )
 { ... }

@@
@@

-objs_per_slab_page(
+objs_per_slab(
 ...
 )

@@
identifier fn =~ "obj_to_index|objs_per_slab";
@@

 fn(...,
-   const struct page *page
+   const struct slab *slab
    ,...)
 {
<...
(
- page_address(page)
+ slab_address(slab)
|
- page
+ slab
)
...>
 }

@@
identifier fn =~ "nearest_obj";
@@

 fn(...,
-   struct page *page
+   const struct slab *slab
    ,...)
 {
<...
(
- page_address(page)
+ slab_address(slab)
|
- page
+ slab
)
...>
 }

@@
identifier fn =~ "nearest_obj|obj_to_index|objs_per_slab";
expression E;
@@

 fn(...,
(
- slab_page(E)
+ E
|
- virt_to_page(E)
+ virt_to_slab(E)
|
- virt_to_head_page(E)
+ virt_to_slab(E)
|
- page
+ page_slab(page)
)
  ,...)

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <kasan-dev@googlegroups.com>
Cc: <cgroups@vger.kernel.org>
---
 include/linux/slab_def.h | 16 ++++++++--------
 include/linux/slub_def.h | 18 +++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 3aa5e1e73ab6..e24c9aff6fed 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -87,11 +87,11 @@ struct kmem_cache {
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
-static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
 				void *x)
 {
-	void *object = x - (x - page->s_mem) % cache->size;
-	void *last_object = page->s_mem + (cache->num - 1) * cache->size;
+	void *object = x - (x - slab->s_mem) % cache->size;
+	void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
 
 	if (unlikely(object > last_object))
 		return last_object;
@@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
  *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
  */
 static inline unsigned int obj_to_index(const struct kmem_cache *cache,
-					const struct page *page, void *obj)
+					const struct slab *slab, void *obj)
 {
-	u32 offset = (obj - page->s_mem);
+	u32 offset = (obj - slab->s_mem);
 	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
-static inline int objs_per_slab_page(const struct kmem_cache *cache,
-				     const struct page *page)
+static inline int objs_per_slab(const struct kmem_cache *cache,
+				     const struct slab *slab)
 {
-	if (is_kfence_address(page_address(page)))
+	if (is_kfence_address(slab_address(slab)))
 		return 1;
 	return cache->num;
 }
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 8a9c2876ca89..33c5c0e3bd8d 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -158,11 +158,11 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
 
 void *fixup_red_left(struct kmem_cache *s, void *p);
 
-static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
 				void *x) {
-	void *object = x - (x - page_address(page)) % cache->size;
-	void *last_object = page_address(page) +
-		(page->objects - 1) * cache->size;
+	void *object = x - (x - slab_address(slab)) % cache->size;
+	void *last_object = slab_address(slab) +
+		(slab->objects - 1) * cache->size;
 	void *result = (unlikely(object > last_object)) ? last_object : object;
 
 	result = fixup_red_left(cache, result);
@@ -178,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
 }
 
 static inline unsigned int obj_to_index(const struct kmem_cache *cache,
-					const struct page *page, void *obj)
+					const struct slab *slab, void *obj)
 {
 	if (is_kfence_address(obj))
 		return 0;
-	return __obj_to_index(cache, page_address(page), obj);
+	return __obj_to_index(cache, slab_address(slab), obj);
 }
 
-static inline int objs_per_slab_page(const struct kmem_cache *cache,
-				     const struct page *page)
+static inline int objs_per_slab(const struct kmem_cache *cache,
+				     const struct slab *slab)
 {
-	return page->objects;
+	return slab->objects;
 }
 #endif /* _LINUX_SLUB_DEF_H */
-- 
cgit v1.2.3


From 4b5f8d9a895ada8e0abb58ccd35d9fe229e3a595 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 2 Nov 2021 22:42:04 +0100
Subject: mm/memcg: Convert slab objcgs from struct page to struct slab

page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages
so convert all the related infrastructure to struct slab. Also use
struct folio instead of struct page when resolving object pointers.

This is not just mechanistic changing of types and names. Now in
mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret
the folio as a real slab instead of a large kmalloc, instead of relying
on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check().
Similarly in memcg_slab_free_hook() where we can encounter
kmalloc_large() pages (here the folio slab flag check is implied by
virt_to_slab()). As a result, page_objcgs_check() can be dropped instead
of converted.

To avoid include cycles, move the inline definition of slab_objcgs()
from memcontrol.h to mm/slab.h.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <cgroups@vger.kernel.org>
---
 include/linux/memcontrol.h | 48 ----------------------------------------------
 1 file changed, 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c5c403f4be6..e34112f6a369 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
 	return folio->memcg_data & MEMCG_DATA_KMEM;
 }
 
-/*
- * page_objcgs - get the object cgroups vector associated with a page
- * @page: a pointer to the page struct
- *
- * Returns a pointer to the object cgroups vector associated with the page,
- * or NULL. This function assumes that the page is known to have an
- * associated object cgroups vector. It's not safe to call this function
- * against pages, which might have an associated memory cgroup: e.g.
- * kernel stack pages.
- */
-static inline struct obj_cgroup **page_objcgs(struct page *page)
-{
-	unsigned long memcg_data = READ_ONCE(page->memcg_data);
-
-	VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
-	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
-
-	return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
-}
-
-/*
- * page_objcgs_check - get the object cgroups vector associated with a page
- * @page: a pointer to the page struct
- *
- * Returns a pointer to the object cgroups vector associated with the page,
- * or NULL. This function is safe to use if the page can be directly associated
- * with a memory cgroup.
- */
-static inline struct obj_cgroup **page_objcgs_check(struct page *page)
-{
-	unsigned long memcg_data = READ_ONCE(page->memcg_data);
-
-	if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
-		return NULL;
-
-	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
-
-	return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
-}
 
 #else
 static inline bool folio_memcg_kmem(struct folio *folio)
@@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
 	return false;
 }
 
-static inline struct obj_cgroup **page_objcgs(struct page *page)
-{
-	return NULL;
-}
-
-static inline struct obj_cgroup **page_objcgs_check(struct page *page)
-{
-	return NULL;
-}
 #endif
 
 static inline bool PageMemcgKmem(struct page *page)
-- 
cgit v1.2.3


From 6e48a966dfd18987fec9385566a67d36e2b5fc11 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 4 Oct 2021 14:46:46 +0100
Subject: mm/kasan: Convert to struct folio and struct slab

KASAN accesses some slab related struct page fields so we need to
convert it to struct slab. Some places are a bit simplified thanks to
kasan_addr_to_slab() encapsulating the PageSlab flag check through
virt_to_slab().  When resolving object address to either a real slab or
a large kmalloc, use struct folio as the intermediate type for testing
the slab flag to avoid unnecessary implicit compound_head().

[ vbabka@suse.cz: use struct folio, adjust to differences in previous
  patches ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Tested-by: Hyeongogn Yoo <42.hyeyoo@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: <kasan-dev@googlegroups.com>
---
 include/linux/kasan.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d8783b682669..fb78108d694e 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -9,6 +9,7 @@
 
 struct kmem_cache;
 struct page;
+struct slab;
 struct vm_struct;
 struct task_struct;
 
@@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
 	return 0;
 }
 
-void __kasan_poison_slab(struct page *page);
-static __always_inline void kasan_poison_slab(struct page *page)
+void __kasan_poison_slab(struct slab *slab);
+static __always_inline void kasan_poison_slab(struct slab *slab)
 {
 	if (kasan_enabled())
-		__kasan_poison_slab(page);
+		__kasan_poison_slab(slab);
 }
 
 void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
 				      slab_flags_t *flags) {}
 static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
 static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
-static inline void kasan_poison_slab(struct page *page) {}
+static inline void kasan_poison_slab(struct slab *slab) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
 					void *object) {}
 static inline void kasan_poison_object_data(struct kmem_cache *cache,
-- 
cgit v1.2.3


From c5e97ed154589524a1df4ae2be55c4cfdb0d0573 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 4 Oct 2021 14:46:48 +0100
Subject: bootmem: Use page->index instead of page->freelist

page->freelist is for the use of slab.  Using page->index is the same
set of bits as page->freelist, and by using an integer instead of a
pointer, we can avoid casts.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: <x86@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 include/linux/bootmem_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index 2bc8b1f69c93..cc35d010fa94 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page);
  */
 static inline void free_bootmem_page(struct page *page)
 {
-	unsigned long magic = (unsigned long)page->freelist;
+	unsigned long magic = page->index;
 
 	/*
 	 * The reserve_bootmem_region sets the reserved flag on bootmem
-- 
cgit v1.2.3


From 63cfc65753d604edc6cfe07e6fba2bf8ececb293 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 6 Jan 2022 00:11:49 +0200
Subject: net: dsa: don't enumerate dsa_switch and dsa_port bit fields using
 commas

This is a cosmetic incremental fixup to commits
7787ff776398 ("net: dsa: merge all bools of struct dsa_switch into a single u32")
bde82f389af1 ("net: dsa: merge all bools of struct dsa_port into a single u8")

The desire to make this change was enunciated after posting these
patches here:
https://patchwork.kernel.org/project/netdevbpf/cover/20220105132141.2648876-1-vladimir.oltean@nxp.com/

but due to a slight timing overlap (message posted at 2:28 p.m. UTC,
merge commit is at 2:46 p.m. UTC), that comment was missed and the
changes were applied as-is.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 114 +++++++++++++++++++++++++++---------------------------
 1 file changed, 56 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 5d0fec6db3ae..63c7f553f938 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -265,14 +265,16 @@ struct dsa_port {
 
 	u8			stp_state;
 
-	u8			vlan_filtering:1,
-				/* Managed by DSA on user ports and by
-				 * drivers on CPU and DSA ports
-				 */
-				learning:1,
-				lag_tx_enabled:1,
-				devlink_port_setup:1,
-				setup:1;
+	u8			vlan_filtering:1;
+
+	/* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+	u8			learning:1;
+
+	u8			lag_tx_enabled:1;
+
+	u8			devlink_port_setup:1;
+
+	u8			setup:1;
 
 	struct device_node	*dn;
 	unsigned int		ageing_time;
@@ -331,56 +333,52 @@ struct dsa_switch {
 	struct dsa_switch_tree	*dst;
 	unsigned int		index;
 
-	u32			setup:1,
-				/* Disallow bridge core from requesting
-				 * different VLAN awareness settings on ports
-				 * if not hardware-supported
-				 */
-				vlan_filtering_is_global:1,
-				/* Keep VLAN filtering enabled on ports not
-				 * offloading any upper
-				 */
-				needs_standalone_vlan_filtering:1,
-				/* Pass .port_vlan_add and .port_vlan_del to
-				 * drivers even for bridges that have
-				 * vlan_filtering=0. All drivers should ideally
-				 * set this (and then the option would get
-				 * removed), but it is unknown whether this
-				 * would break things or not.
-				 */
-				configure_vlan_while_not_filtering:1,
-				/* If the switch driver always programs the CPU
-				 * port as egress tagged despite the VLAN
-				 * configuration indicating otherwise, then
-				 * setting @untag_bridge_pvid will force the
-				 * DSA receive path to pop the bridge's
-				 * default_pvid VLAN tagged frames to offer a
-				 * consistent behavior between a
-				 * vlan_filtering=0 and vlan_filtering=1 bridge
-				 * device.
-				 */
-				untag_bridge_pvid:1,
-				/* Let DSA manage the FDB entries towards the
-				 * CPU, based on the software bridge database.
-				 */
-				assisted_learning_on_cpu_port:1,
-				/* In case vlan_filtering_is_global is set, the
-				 * VLAN awareness state should be retrieved
-				 * from here and not from the per-port
-				 * settings.
-				 */
-				vlan_filtering:1,
-				/* MAC PCS does not provide link state change
-				 * interrupt, and requires polling. Flag passed
-				 * on to PHYLINK.
-				 */
-				pcs_poll:1,
-				/* For switches that only have the MRU
-				 * configurable. To ensure the configured MTU
-				 * is not exceeded, normalization of MRU on all
-				 * bridged interfaces is needed.
-				 */
-				mtu_enforcement_ingress:1;
+	u32			setup:1;
+
+	/* Disallow bridge core from requesting different VLAN awareness
+	 * settings on ports if not hardware-supported
+	 */
+	u32			vlan_filtering_is_global:1;
+
+	/* Keep VLAN filtering enabled on ports not offloading any upper */
+	u32			needs_standalone_vlan_filtering:1;
+
+	/* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
+	 * that have vlan_filtering=0. All drivers should ideally set this (and
+	 * then the option would get removed), but it is unknown whether this
+	 * would break things or not.
+	 */
+	u32			configure_vlan_while_not_filtering:1;
+
+	/* If the switch driver always programs the CPU port as egress tagged
+	 * despite the VLAN configuration indicating otherwise, then setting
+	 * @untag_bridge_pvid will force the DSA receive path to pop the
+	 * bridge's default_pvid VLAN tagged frames to offer a consistent
+	 * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge
+	 * device.
+	 */
+	u32			untag_bridge_pvid:1;
+
+	/* Let DSA manage the FDB entries towards the
+	 * CPU, based on the software bridge database.
+	 */
+	u32			assisted_learning_on_cpu_port:1;
+
+	/* In case vlan_filtering_is_global is set, the VLAN awareness state
+	 * should be retrieved from here and not from the per-port settings.
+	 */
+	u32			vlan_filtering:1;
+
+	/* MAC PCS does not provide link state change interrupt, and requires
+	 * polling. Flag passed on to PHYLINK.
+	 */
+	u32			pcs_poll:1;
+
+	/* For switches that only have the MRU configurable. To ensure the
+	 * configured MTU is not exceeded, normalization of MRU on all bridged
+	 * interfaces is needed.
+	 */
+	u32			mtu_enforcement_ingress:1;
 
 	/* Listener for switch fabric events */
 	struct notifier_block	nb;
-- 
cgit v1.2.3


From 1b26d364e4e9bd6540a8e7bcaf50e7f35041feb5 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 6 Jan 2022 00:11:50 +0200
Subject: net: dsa: warn about dsa_port and dsa_switch bit fields being non
 atomic

As discussed during review here:
https://patchwork.kernel.org/project/netdevbpf/patch/20220105132141.2648876-3-vladimir.oltean@nxp.com/

we should inform developers about pitfalls of concurrent access to the
boolean properties of dsa_switch and dsa_port, now that they've been
converted to bit fields. No other measure than a comment needs to be
taken, since the code paths that update these bit fields are not
concurrent with each other.

Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 63c7f553f938..57b3e4e7413b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -265,6 +265,10 @@ struct dsa_port {
 
 	u8			stp_state;
 
+	/* Warning: the following bit fields are not atomic, and updating them
+	 * can only be done from code paths where concurrency is not possible
+	 * (probe time or under rtnl_lock).
+	 */
 	u8			vlan_filtering:1;
 
 	/* Managed by DSA on user ports and by drivers on CPU and DSA ports */
@@ -333,6 +337,10 @@ struct dsa_switch {
 	struct dsa_switch_tree	*dst;
 	unsigned int		index;
 
+	/* Warning: the following bit fields are not atomic, and updating them
+	 * can only be done from code paths where concurrency is not possible
+	 * (probe time or under rtnl_lock).
+	 */
 	u32			setup:1;
 
 	/* Disallow bridge core from requesting different VLAN awareness
-- 
cgit v1.2.3


From 007747a984ea5e895b7d8b056b24ebf431e1e71d Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Wed, 5 Jan 2022 11:33:26 +0100
Subject: net: fix SOF_TIMESTAMPING_BIND_PHC to work with multiple sockets

When multiple sockets using the SOF_TIMESTAMPING_BIND_PHC flag received
a packet with a hardware timestamp (e.g. multiple PTP instances in
different PTP domains using the UDPv4/v6 multicast or L2 transport),
the timestamps received on some sockets were corrupted due to repeated
conversion of the same timestamp (by the same or different vclocks).

Fix ptp_convert_timestamp() to not modify the shared skb timestamp
and return the converted timestamp as a ktime_t instead. If the
conversion fails, return 0 to not confuse the application with
timestamps corresponding to an unexpected PHC.

Fixes: d7c088265588 ("net: socket: support hardware timestamp conversion to PHC bound")
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Yangbo Lu <yangbo.lu@nxp.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 2e5565067355..554454cb8693 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -351,15 +351,17 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index);
  *
  * @hwtstamps:    skb_shared_hwtstamps structure pointer
  * @vclock_index: phc index of ptp vclock.
+ *
+ * Returns converted timestamp, or 0 on error.
  */
-void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
-			   int vclock_index);
+ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps,
+			      int vclock_index);
 #else
 static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
 { return 0; }
-static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
-					 int vclock_index)
-{ }
+static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps,
+					    int vclock_index)
+{ return 0; }
 
 #endif
 
-- 
cgit v1.2.3


From eac1b93c14d645ef147b049ace0d5230df755548 Mon Sep 17 00:00:00 2001
From: Coco Li <lixiaoyan@google.com>
Date: Wed, 5 Jan 2022 02:48:38 -0800
Subject: gro: add ability to control gro max packet size

Eric Dumazet suggested to allow users to modify max GRO packet size.

We have seen GRO being disabled by users of appliances (such as
wifi access points) because of claimed bufferbloat issues,
or some work arounds in sch_cake, to split GRO/GSO packets.

Instead of disabling GRO completely, one can chose to limit
the maximum packet size of GRO packets, depending on their
latency constraints.

This patch adds a per device gro_max_size attribute
that can be changed with ip link command.

ip link set dev eth0 gro_max_size 16000

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Coco Li <lixiaoyan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    | 11 +++++++++++
 include/uapi/linux/if_link.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f99c8f51b60..3213c7227b59 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type {
  *			dev->addr_list_lock.
  *	@unlink_list:	As netif_addr_lock() can be called recursively,
  *			keep a list of interfaces to be deleted.
+ *	@gro_max_size:	Maximum size of aggregated packet in generic
+ *			receive offload (GRO)
  *
  *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
  *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
@@ -2131,6 +2133,8 @@ struct net_device {
 	struct bpf_prog __rcu	*xdp_prog;
 	unsigned long		gro_flush_timeout;
 	int			napi_defer_hard_irqs;
+#define GRO_MAX_SIZE		65536
+	unsigned int		gro_max_size;
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
@@ -4806,6 +4810,13 @@ static inline void netif_set_gso_max_segs(struct net_device *dev,
 	WRITE_ONCE(dev->gso_max_segs, segs);
 }
 
+static inline void netif_set_gro_max_size(struct net_device *dev,
+					  unsigned int size)
+{
+	/* This pairs with the READ_ONCE() in skb_gro_receive() */
+	WRITE_ONCE(dev->gro_max_size, size);
+}
+
 static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
 					int pulled_hlen, u16 mac_offset,
 					int mac_len)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4ac53b30b6dc..6218f93f5c1a 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -347,6 +347,7 @@ enum {
 	 */
 	IFLA_PARENT_DEV_NAME,
 	IFLA_PARENT_DEV_BUS_NAME,
+	IFLA_GRO_MAX_SIZE,
 
 	__IFLA_MAX
 };
-- 
cgit v1.2.3


From 72279d17df54d5e4e7910b39c61a3f3464e36633 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 5 Jan 2022 14:59:53 -0800
Subject: Bluetooth: hci_event: Rework hci_inquiry_result_with_rssi_evt

This rework the handling of hci_inquiry_result_with_rssi_evt to not use
a union to represent the different inquiry responses.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Soenke Huster <soenke.huster@eknoes.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index e2b06bb79e2e..35c073d44ec5 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2233,11 +2233,7 @@ struct inquiry_info_rssi_pscan {
 } __packed;
 struct hci_ev_inquiry_result_rssi {
 	__u8     num;
-	struct inquiry_info_rssi info[];
-} __packed;
-struct hci_ev_inquiry_result_rssi_pscan {
-	__u8     num;
-	struct inquiry_info_rssi_pscan info[];
+	__u8     data[];
 } __packed;
 
 #define HCI_EV_REMOTE_EXT_FEATURES	0x23
-- 
cgit v1.2.3


From 3809fe479861194e310c23ed48b010c7c0f72d22 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 16 Dec 2021 10:21:57 +0100
Subject: HID: address kernel-doc warnings

The command ./scripts/kernel-doc -none include/linux/hid.h reports:

  include/linux/hid.h:818: warning: cannot understand function prototype: 'struct hid_ll_driver '
  include/linux/hid.h:1135: warning: expecting prototype for hid_may_wakeup(). Prototype was for hid_hw_may_wakeup() instead

Address those kernel-doc warnings.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index f453be385bd4..aaf89a8a836c 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -788,7 +788,7 @@ struct hid_driver {
 	container_of(pdrv, struct hid_driver, driver)
 
 /**
- * hid_ll_driver - low level driver callbacks
+ * struct hid_ll_driver - low level driver callbacks
  * @start: called on probe to start the device
  * @stop: called on remove
  * @open: called by input layer on open
@@ -1158,7 +1158,7 @@ static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle,
 }
 
 /**
- * hid_may_wakeup - return if the hid device may act as a wakeup source during system-suspend
+ * hid_hw_may_wakeup - return if the hid device may act as a wakeup source during system-suspend
  *
  * @hdev: hid device
  */
-- 
cgit v1.2.3


From 36dacddbf0bdba86cd00f066b4d724157eeb63f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dirk=20M=C3=BCller?= <dmueller@suse.de>
Date: Wed, 5 Jan 2022 17:38:47 +0100
Subject: lib/raid6: Use strict priority ranking for pq gen() benchmarking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On x86_64, currently 3 variants of AVX512, 3 variants of AVX2
and 3 variants of SSE2 are benchmarked on initialization, taking
between 144-153 jiffies. Testing across a hardware pool of
various generations of intel cpus I could not find a single
case where SSE2 won over AVX2 or AVX512. There are cases where
AVX2 wins over AVX512 however.

Change "prefer" into an integer priority field (similar to
how recov selection works) to have more than one ranking level
available, which is backwards compatible with existing behavior.

Give AVX2/512 variants higher priority over SSE2 in order to skip
SSE testing when AVX is available. in a AVX2/x86_64/HZ=250 case this
saves in the order of 200ms of initialization time.

Signed-off-by: Dirk Müller <dmueller@suse.de>
Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 include/linux/raid/pq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 154e954b711d..d6e5a1feb947 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -81,7 +81,7 @@ struct raid6_calls {
 	void (*xor_syndrome)(int, int, int, size_t, void **);
 	int  (*valid)(void);	/* Returns 1 if this routine set is usable */
 	const char *name;	/* Name of this routine set */
-	int prefer;		/* Has special performance attribute */
+	int priority;		/* Relative priority ranking if non-zero */
 };
 
 /* Selected algorithm */
-- 
cgit v1.2.3


From 07f910f9b7295b6a28b337fedb56e612684c5659 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 4 Oct 2021 14:46:50 +0100
Subject: mm: Remove slab from struct page

All members of struct slab can now be removed from struct page.
This shrinks the definition of struct page by 30 LOC, making
it easier to understand.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm_types.h   | 28 ----------------------------
 include/linux/page-flags.h | 37 -------------------------------------
 2 files changed, 65 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1ae3537c7920..646f3ed4f6df 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -118,31 +118,6 @@ struct page {
 				atomic_long_t pp_frag_count;
 			};
 		};
-		struct {	/* slab, slob and slub */
-			union {
-				struct list_head slab_list;
-				struct {	/* Partial pages */
-					struct page *next;
-#ifdef CONFIG_64BIT
-					int pages;	/* Nr of pages left */
-#else
-					short int pages;
-#endif
-				};
-			};
-			struct kmem_cache *slab_cache; /* not slob */
-			/* Double-word boundary */
-			void *freelist;		/* first free object */
-			union {
-				void *s_mem;	/* slab: first object */
-				unsigned long counters;		/* SLUB */
-				struct {			/* SLUB */
-					unsigned inuse:16;
-					unsigned objects:15;
-					unsigned frozen:1;
-				};
-			};
-		};
 		struct {	/* Tail pages of compound page */
 			unsigned long compound_head;	/* Bit zero is set */
 
@@ -206,9 +181,6 @@ struct page {
 		 * which are currently stored here.
 		 */
 		unsigned int page_type;
-
-		unsigned int active;		/* SLAB */
-		int units;			/* SLOB */
 	};
 
 	/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b5f14d581113..1b08e33265fa 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -909,43 +909,6 @@ extern bool is_free_buddy_page(struct page *page);
 
 __PAGEFLAG(Isolated, isolated, PF_ANY);
 
-/*
- * If network-based swap is enabled, sl*b must keep track of whether pages
- * were allocated from pfmemalloc reserves.
- */
-static inline int PageSlabPfmemalloc(struct page *page)
-{
-	VM_BUG_ON_PAGE(!PageSlab(page), page);
-	return PageActive(page);
-}
-
-/*
- * A version of PageSlabPfmemalloc() for opportunistic checks where the page
- * might have been freed under us and not be a PageSlab anymore.
- */
-static inline int __PageSlabPfmemalloc(struct page *page)
-{
-	return PageActive(page);
-}
-
-static inline void SetPageSlabPfmemalloc(struct page *page)
-{
-	VM_BUG_ON_PAGE(!PageSlab(page), page);
-	SetPageActive(page);
-}
-
-static inline void __ClearPageSlabPfmemalloc(struct page *page)
-{
-	VM_BUG_ON_PAGE(!PageSlab(page), page);
-	__ClearPageActive(page);
-}
-
-static inline void ClearPageSlabPfmemalloc(struct page *page)
-{
-	VM_BUG_ON_PAGE(!PageSlab(page), page);
-	ClearPageActive(page);
-}
-
 #ifdef CONFIG_MMU
 #define __PG_MLOCKED		(1UL << PG_mlocked)
 #else
-- 
cgit v1.2.3


From 703f7066f40599c290babdb79dd61319264987e9 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 7 Dec 2021 13:17:33 +0100
Subject: random: remove unused irq_flags argument from
 add_interrupt_randomness()

Since commit
   ee3e00e9e7101 ("random: use registers from interrupted code for CPU's w/o a cycle counter")

the irq_flags argument is no longer used.

Remove unused irq_flags.

Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: linux-hyperv@vger.kernel.org
Cc: x86@kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/random.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/random.h b/include/linux/random.h
index f45b8be3e3c4..c45b2693e51f 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {}
 
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value) __latent_entropy;
-extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
+extern void add_interrupt_randomness(int irq) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
 extern int wait_for_random_bytes(void);
-- 
cgit v1.2.3


From 6048fdcc5f269c7f31d774c295ce59081b36e6f9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 22 Dec 2021 14:56:58 +0100
Subject: lib/crypto: blake2s: include as built-in

In preparation for using blake2s in the RNG, we change the way that it
is wired-in to the build system. Instead of using ifdefs to select the
right symbol, we use weak symbols. And because ARM doesn't need the
generic implementation, we make the generic one default only if an arch
library doesn't need it already, and then have arch libraries that do
need it opt-in. So that the arch libraries can remain tristate rather
than bool, we then split the shash part from the glue code.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: linux-kbuild@vger.kernel.org
Cc: linux-crypto@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/crypto/internal/blake2s.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
index 8e50d487500f..d39cfa0d333e 100644
--- a/include/crypto/internal/blake2s.h
+++ b/include/crypto/internal/blake2s.h
@@ -11,11 +11,11 @@
 #include <crypto/internal/hash.h>
 #include <linux/string.h>
 
-void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
+void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
 			      size_t nblocks, const u32 inc);
 
-void blake2s_compress_arch(struct blake2s_state *state,const u8 *block,
-			   size_t nblocks, const u32 inc);
+void blake2s_compress(struct blake2s_state *state, const u8 *block,
+		      size_t nblocks, const u32 inc);
 
 bool blake2s_selftest(void);
 
-- 
cgit v1.2.3


From 96562f286884e2db89c74215b199a1084b5fb7f7 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 31 Dec 2021 09:26:08 +0100
Subject: random: early initialization of ChaCha constants

Previously, the ChaCha constants for the primary pool were only
initialized in crng_initialize_primary(), called by rand_initialize().
However, some randomness is actually extracted from the primary pool
beforehand, e.g. by kmem_cache_create(). Therefore, statically
initialize the ChaCha constants for the primary pool.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: <linux-crypto@vger.kernel.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/crypto/chacha.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index dabaee698718..b3ea73b81944 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
 		hchacha_block_generic(state, out, nrounds);
 }
 
+enum chacha_constants { /* expand 32-byte k */
+	CHACHA_CONSTANT_EXPA = 0x61707865U,
+	CHACHA_CONSTANT_ND_3 = 0x3320646eU,
+	CHACHA_CONSTANT_2_BY = 0x79622d32U,
+	CHACHA_CONSTANT_TE_K = 0x6b206574U
+};
+
 static inline void chacha_init_consts(u32 *state)
 {
-	state[0]  = 0x61707865; /* "expa" */
-	state[1]  = 0x3320646e; /* "nd 3" */
-	state[2]  = 0x79622d32; /* "2-by" */
-	state[3]  = 0x6b206574; /* "te k" */
+	state[0]  = CHACHA_CONSTANT_EXPA;
+	state[1]  = CHACHA_CONSTANT_ND_3;
+	state[2]  = CHACHA_CONSTANT_2_BY;
+	state[3]  = CHACHA_CONSTANT_TE_K;
 }
 
 void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
-- 
cgit v1.2.3


From 79b60ca83b6fa63ef307d2edcc77ee6581da8971 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Sun, 12 Dec 2021 14:51:27 +0200
Subject: net/mlx5: Introduce API for bulk request and release of IRQs

Currently IRQs are requested one by one. To balance spreading IRQs
among cpus using such scheme requires remembering cpu mask for the
cpus used for a given device. This complicates the IRQ allocation
scheme in subsequent patch.

Hence, prepare the code for bulk IRQs allocation. This enables
spreading IRQs among cpus in subsequent patch.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/eq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index ea3ff5a8ced3..3705a382276b 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -9,13 +9,13 @@
 #define MLX5_NUM_SPARE_EQE (0x80)
 
 struct mlx5_eq;
+struct mlx5_irq;
 struct mlx5_core_dev;
 
 struct mlx5_eq_param {
-	u8             irq_index;
 	int            nent;
 	u64            mask[4];
-	cpumask_var_t  affinity;
+	struct mlx5_irq *irq;
 };
 
 struct mlx5_eq *
-- 
cgit v1.2.3


From 91a760b26926265a60c77ddf016529bcf3e17a04 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Thu, 6 Jan 2022 21:20:20 +0800
Subject: net: bpf: Handle return value of
 BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND()

The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in
__inet_bind() is not handled properly. While the return value
is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and
exit:

	err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
	if (err) {
		inet->inet_saddr = inet->inet_rcv_saddr = 0;
		goto out_release_sock;
	}

Let's take UDP for example and see what will happen. For UDP
socket, it will be added to 'udp_prot.h.udp_table->hash' and
'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port()
called success. If 'inet->inet_rcv_saddr' is specified here,
then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong
to (because inet_saddr is changed to 0), and UDP packet received
will not be passed to this sock. If 'inet->inet_rcv_saddr' is not
specified here, the sock will work fine, as it can receive packet
properly, which is wired, as the 'bind()' is already failed.

To undo the get_port() operation, introduce the 'put_port' field
for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP
proto, it is udp_lib_unhash(); For icmp proto, it is
ping_unhash().

Therefore, after sys_bind() fail caused by
BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which
means that it can try to be binded to another port.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220106132022.3470772-2-imagedong@tencent.com
---
 include/net/sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 7b4b4237e6e0..ff9b508d9c5f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1209,6 +1209,7 @@ struct proto {
 	void			(*unhash)(struct sock *sk);
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
+	void			(*put_port)(struct sock *sk);
 #ifdef CONFIG_BPF_SYSCALL
 	int			(*psock_update_sk_prot)(struct sock *sk,
 							struct sk_psock *psock,
-- 
cgit v1.2.3


From 51d04bcfb82a005d38b6f1011dc04a810d359aea Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Mon, 20 Dec 2021 19:33:18 +0000
Subject: dt-bindings: clk/ingenic: Add MDMA and BDMA clocks

The Ingenic JZ4760 and JZ4770 both have an extra DMA core named BDMA
dedicated to the NAND and BCH controller, but which can also do
memory-to-memory transfers. The JZ4760 additionally has a DMA core named
MDMA dedicated to memory-to-memory transfers. The programming manual for
the JZ4770 does have a bit for a MDMA clock, but does not seem to have
the hardware wired in.

Add macros for the MDMA and BDMA clocks to the dt-bindings include
files, so that they can be used within Device Tree files.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20211220193319.114974-2-paul@crapouillou.net
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/ingenic,jz4760-cgu.h | 2 ++
 include/dt-bindings/clock/ingenic,jz4770-cgu.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/ingenic,jz4760-cgu.h b/include/dt-bindings/clock/ingenic,jz4760-cgu.h
index 4bb2e19c4743..9fb04ebac6de 100644
--- a/include/dt-bindings/clock/ingenic,jz4760-cgu.h
+++ b/include/dt-bindings/clock/ingenic,jz4760-cgu.h
@@ -50,5 +50,7 @@
 #define JZ4760_CLK_LPCLK_DIV	41
 #define JZ4760_CLK_TVE		42
 #define JZ4760_CLK_LPCLK	43
+#define JZ4760_CLK_MDMA		44
+#define JZ4760_CLK_BDMA		45
 
 #endif /* __DT_BINDINGS_CLOCK_JZ4760_CGU_H__ */
diff --git a/include/dt-bindings/clock/ingenic,jz4770-cgu.h b/include/dt-bindings/clock/ingenic,jz4770-cgu.h
index d68a7695a1f8..0b475e8ae321 100644
--- a/include/dt-bindings/clock/ingenic,jz4770-cgu.h
+++ b/include/dt-bindings/clock/ingenic,jz4770-cgu.h
@@ -54,5 +54,6 @@
 #define JZ4770_CLK_OTG_PHY	45
 #define JZ4770_CLK_EXT512	46
 #define JZ4770_CLK_RTC		47
+#define JZ4770_CLK_BDMA		48
 
 #endif /* __DT_BINDINGS_CLOCK_JZ4770_CGU_H__ */
-- 
cgit v1.2.3


From 3663f26b389b3951426971b44bb9312fdff0efec Mon Sep 17 00:00:00 2001
From: Ajit Kumar Pandey <AjitKumar.Pandey@amd.com>
Date: Sun, 12 Dec 2021 23:35:24 +0530
Subject: drivers: acpi: acpi_apd: Remove unused device property "is-rv"

Initially "is-rv" device property is added for 48MHz fixed clock
support on Raven or RV architecture. It's unused now as we moved
to pci device_id based selection to extend such support on other
architectures. This change removed unused code from acpi driver.

Signed-off-by: Ajit Kumar Pandey <AjitKumar.Pandey@amd.com>
Reviewed-by: Mario Limonciello <Mario.Limonciello@amd.com>
Link: https://lore.kernel.org/r/20211212180527.1641362-3-AjitKumar.Pandey@amd.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/platform_data/clk-fch.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h
index b9f682459f08..850ca776156d 100644
--- a/include/linux/platform_data/clk-fch.h
+++ b/include/linux/platform_data/clk-fch.h
@@ -12,7 +12,6 @@
 
 struct fch_clk_data {
 	void __iomem *base;
-	u32 is_rv;
 };
 
 #endif /* __CLK_FCH_H */
-- 
cgit v1.2.3


From 7fdb98e8a768b3ccc05494d3ea4436047f512b9d Mon Sep 17 00:00:00 2001
From: Ajit Kumar Pandey <AjitKumar.Pandey@amd.com>
Date: Sun, 12 Dec 2021 23:35:25 +0530
Subject: ACPI: APD: Add a fmw property clk-name

Add a new device property to fetch clk-name from firmware.

Signed-off-by: Ajit Kumar Pandey <AjitKumar.Pandey@amd.com>
Reviewed-by: Mario Limonciello <Mario.Limonciello@amd.com>
Link: https://lore.kernel.org/r/20211212180527.1641362-4-AjitKumar.Pandey@amd.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/platform_data/clk-fch.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h
index 850ca776156d..11a2a23fd9b2 100644
--- a/include/linux/platform_data/clk-fch.h
+++ b/include/linux/platform_data/clk-fch.h
@@ -12,6 +12,7 @@
 
 struct fch_clk_data {
 	void __iomem *base;
+	char *name;
 };
 
 #endif /* __CLK_FCH_H */
-- 
cgit v1.2.3


From 4470c830f9791203e7514c6b4d3a0df194e3ee0d Mon Sep 17 00:00:00 2001
From: Sam Shih <sam.shih@mediatek.com>
Date: Fri, 17 Dec 2021 20:11:47 +0800
Subject: clk: mediatek: add mt7986 clock IDs

Add MT7986 clock dt-bindings, include topckgen, apmixedsys,
infracfg, and ethernet subsystem clocks.

Signed-off-by: Sam Shih <sam.shih@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20211217121148.6753-3-sam.shih@mediatek.com
Reviewed-by: Ryder Lee <ryder.lee@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/mt7986-clk.h | 169 +++++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 include/dt-bindings/clock/mt7986-clk.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/mt7986-clk.h b/include/dt-bindings/clock/mt7986-clk.h
new file mode 100644
index 000000000000..5a9b169324b0
--- /dev/null
+++ b/include/dt-bindings/clock/mt7986-clk.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Sam Shih <sam.shih@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT7986_H
+#define _DT_BINDINGS_CLK_MT7986_H
+
+/* APMIXEDSYS */
+
+#define CLK_APMIXED_ARMPLL		0
+#define CLK_APMIXED_NET2PLL		1
+#define CLK_APMIXED_MMPLL		2
+#define CLK_APMIXED_SGMPLL		3
+#define CLK_APMIXED_WEDMCUPLL		4
+#define CLK_APMIXED_NET1PLL		5
+#define CLK_APMIXED_MPLL		6
+#define CLK_APMIXED_APLL2		7
+
+/* TOPCKGEN */
+
+#define CLK_TOP_XTAL			0
+#define CLK_TOP_XTAL_D2			1
+#define CLK_TOP_RTC_32K			2
+#define CLK_TOP_RTC_32P7K		3
+#define CLK_TOP_MPLL_D2			4
+#define CLK_TOP_MPLL_D4			5
+#define CLK_TOP_MPLL_D8			6
+#define CLK_TOP_MPLL_D8_D2		7
+#define CLK_TOP_MPLL_D3_D2		8
+#define CLK_TOP_MMPLL_D2		9
+#define CLK_TOP_MMPLL_D4		10
+#define CLK_TOP_MMPLL_D8		11
+#define CLK_TOP_MMPLL_D8_D2		12
+#define CLK_TOP_MMPLL_D3_D8		13
+#define CLK_TOP_MMPLL_U2PHY		14
+#define CLK_TOP_APLL2_D4		15
+#define CLK_TOP_NET1PLL_D4		16
+#define CLK_TOP_NET1PLL_D5		17
+#define CLK_TOP_NET1PLL_D5_D2		18
+#define CLK_TOP_NET1PLL_D5_D4		19
+#define CLK_TOP_NET1PLL_D8_D2		20
+#define CLK_TOP_NET1PLL_D8_D4		21
+#define CLK_TOP_NET2PLL_D4		22
+#define CLK_TOP_NET2PLL_D4_D2		23
+#define CLK_TOP_NET2PLL_D3_D2		24
+#define CLK_TOP_WEDMCUPLL_D5_D2		25
+#define CLK_TOP_NFI1X_SEL		26
+#define CLK_TOP_SPINFI_SEL		27
+#define CLK_TOP_SPI_SEL			28
+#define CLK_TOP_SPIM_MST_SEL		29
+#define CLK_TOP_UART_SEL		30
+#define CLK_TOP_PWM_SEL			31
+#define CLK_TOP_I2C_SEL			32
+#define CLK_TOP_PEXTP_TL_SEL		33
+#define CLK_TOP_EMMC_250M_SEL		34
+#define CLK_TOP_EMMC_416M_SEL		35
+#define CLK_TOP_F_26M_ADC_SEL		36
+#define CLK_TOP_DRAMC_SEL		37
+#define CLK_TOP_DRAMC_MD32_SEL		38
+#define CLK_TOP_SYSAXI_SEL		39
+#define CLK_TOP_SYSAPB_SEL		40
+#define CLK_TOP_ARM_DB_MAIN_SEL		41
+#define CLK_TOP_ARM_DB_JTSEL		42
+#define CLK_TOP_NETSYS_SEL		43
+#define CLK_TOP_NETSYS_500M_SEL		44
+#define CLK_TOP_NETSYS_MCU_SEL		45
+#define CLK_TOP_NETSYS_2X_SEL		46
+#define CLK_TOP_SGM_325M_SEL		47
+#define CLK_TOP_SGM_REG_SEL		48
+#define CLK_TOP_A1SYS_SEL		49
+#define CLK_TOP_CONN_MCUSYS_SEL		50
+#define CLK_TOP_EIP_B_SEL		51
+#define CLK_TOP_PCIE_PHY_SEL		52
+#define CLK_TOP_USB3_PHY_SEL		53
+#define CLK_TOP_F26M_SEL		54
+#define CLK_TOP_AUD_L_SEL		55
+#define CLK_TOP_A_TUNER_SEL		56
+#define CLK_TOP_U2U3_SEL		57
+#define CLK_TOP_U2U3_SYS_SEL		58
+#define CLK_TOP_U2U3_XHCI_SEL		59
+#define CLK_TOP_DA_U2_REFSEL		60
+#define CLK_TOP_DA_U2_CK_1P_SEL		61
+#define CLK_TOP_AP2CNN_HOST_SEL		62
+#define CLK_TOP_JTAG			63
+
+/* INFRACFG */
+
+#define CLK_INFRA_SYSAXI_D2		0
+#define CLK_INFRA_UART0_SEL		1
+#define CLK_INFRA_UART1_SEL		2
+#define CLK_INFRA_UART2_SEL		3
+#define CLK_INFRA_SPI0_SEL		4
+#define CLK_INFRA_SPI1_SEL		5
+#define CLK_INFRA_PWM1_SEL		6
+#define CLK_INFRA_PWM2_SEL		7
+#define CLK_INFRA_PWM_BSEL		8
+#define CLK_INFRA_PCIE_SEL		9
+#define CLK_INFRA_GPT_STA		10
+#define CLK_INFRA_PWM_HCK		11
+#define CLK_INFRA_PWM_STA		12
+#define CLK_INFRA_PWM1_CK		13
+#define CLK_INFRA_PWM2_CK		14
+#define CLK_INFRA_CQ_DMA_CK		15
+#define CLK_INFRA_EIP97_CK		16
+#define CLK_INFRA_AUD_BUS_CK		17
+#define CLK_INFRA_AUD_26M_CK		18
+#define CLK_INFRA_AUD_L_CK		19
+#define CLK_INFRA_AUD_AUD_CK		20
+#define CLK_INFRA_AUD_EG2_CK		21
+#define CLK_INFRA_DRAMC_26M_CK		22
+#define CLK_INFRA_DBG_CK		23
+#define CLK_INFRA_AP_DMA_CK		24
+#define CLK_INFRA_SEJ_CK		25
+#define CLK_INFRA_SEJ_13M_CK		26
+#define CLK_INFRA_THERM_CK		27
+#define CLK_INFRA_I2C0_CK		28
+#define CLK_INFRA_UART0_CK		29
+#define CLK_INFRA_UART1_CK		30
+#define CLK_INFRA_UART2_CK		31
+#define CLK_INFRA_NFI1_CK		32
+#define CLK_INFRA_SPINFI1_CK		33
+#define CLK_INFRA_NFI_HCK_CK		34
+#define CLK_INFRA_SPI0_CK		35
+#define CLK_INFRA_SPI1_CK		36
+#define CLK_INFRA_SPI0_HCK_CK		37
+#define CLK_INFRA_SPI1_HCK_CK		38
+#define CLK_INFRA_FRTC_CK		39
+#define CLK_INFRA_MSDC_CK		40
+#define CLK_INFRA_MSDC_HCK_CK		41
+#define CLK_INFRA_MSDC_133M_CK		42
+#define CLK_INFRA_MSDC_66M_CK		43
+#define CLK_INFRA_ADC_26M_CK		44
+#define CLK_INFRA_ADC_FRC_CK		45
+#define CLK_INFRA_FBIST2FPC_CK		46
+#define CLK_INFRA_IUSB_133_CK		47
+#define CLK_INFRA_IUSB_66M_CK		48
+#define CLK_INFRA_IUSB_SYS_CK		49
+#define CLK_INFRA_IUSB_CK		50
+#define CLK_INFRA_IPCIE_CK		51
+#define CLK_INFRA_IPCIE_PIPE_CK		52
+#define CLK_INFRA_IPCIER_CK		53
+#define CLK_INFRA_IPCIEB_CK		54
+#define CLK_INFRA_TRNG_CK		55
+
+/* SGMIISYS_0 */
+
+#define CLK_SGMII0_TX250M_EN		0
+#define CLK_SGMII0_RX250M_EN		1
+#define CLK_SGMII0_CDR_REF		2
+#define CLK_SGMII0_CDR_FB		3
+
+/* SGMIISYS_1 */
+
+#define CLK_SGMII1_TX250M_EN		0
+#define CLK_SGMII1_RX250M_EN		1
+#define CLK_SGMII1_CDR_REF		2
+#define CLK_SGMII1_CDR_FB		3
+
+/* ETHSYS */
+
+#define CLK_ETH_FE_EN			0
+#define CLK_ETH_GP2_EN			1
+#define CLK_ETH_GP1_EN			2
+#define CLK_ETH_WOCPU1_EN		3
+#define CLK_ETH_WOCPU0_EN		4
+
+#endif /* _DT_BINDINGS_CLK_MT7986_H */
-- 
cgit v1.2.3


From 850cba069c266d6f31b81c5a199052a3482a63fc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 31 Oct 2021 12:58:05 +0000
Subject: cachefiles: Delete the cachefiles driver pending rewrite

Delete the code from the cachefiles driver to make it easier to rewrite and
resubmit in a logical manner.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819577641.215744.12718114397770666596.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906883770.143852.4149714614981373410.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967076066.1823006.7175712134577687753.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021483619.640689.7586546280515844702.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 321 --------------------------------------
 1 file changed, 321 deletions(-)
 delete mode 100644 include/trace/events/cachefiles.h

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
deleted file mode 100644
index 920b6a303d60..000000000000
--- a/include/trace/events/cachefiles.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* CacheFiles tracepoints
- *
- * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM cachefiles
-
-#if !defined(_TRACE_CACHEFILES_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_CACHEFILES_H
-
-#include <linux/tracepoint.h>
-
-/*
- * Define enums for tracing information.
- */
-#ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
-#define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
-
-enum cachefiles_obj_ref_trace {
-	cachefiles_obj_put_wait_retry = fscache_obj_ref__nr_traces,
-	cachefiles_obj_put_wait_timeo,
-	cachefiles_obj_ref__nr_traces
-};
-
-#endif
-
-/*
- * Define enum -> string mappings for display.
- */
-#define cachefiles_obj_kill_traces				\
-	EM(FSCACHE_OBJECT_IS_STALE,	"stale")		\
-	EM(FSCACHE_OBJECT_NO_SPACE,	"no_space")		\
-	EM(FSCACHE_OBJECT_WAS_RETIRED,	"was_retired")		\
-	E_(FSCACHE_OBJECT_WAS_CULLED,	"was_culled")
-
-#define cachefiles_obj_ref_traces					\
-	EM(fscache_obj_get_add_to_deps,		"GET add_to_deps")	\
-	EM(fscache_obj_get_queue,		"GET queue")		\
-	EM(fscache_obj_put_alloc_fail,		"PUT alloc_fail")	\
-	EM(fscache_obj_put_attach_fail,		"PUT attach_fail")	\
-	EM(fscache_obj_put_drop_obj,		"PUT drop_obj")		\
-	EM(fscache_obj_put_enq_dep,		"PUT enq_dep")		\
-	EM(fscache_obj_put_queue,		"PUT queue")		\
-	EM(fscache_obj_put_work,		"PUT work")		\
-	EM(cachefiles_obj_put_wait_retry,	"PUT wait_retry")	\
-	E_(cachefiles_obj_put_wait_timeo,	"PUT wait_timeo")
-
-/*
- * Export enum symbols via userspace.
- */
-#undef EM
-#undef E_
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define E_(a, b) TRACE_DEFINE_ENUM(a);
-
-cachefiles_obj_kill_traces;
-cachefiles_obj_ref_traces;
-
-/*
- * Now redefine the EM() and E_() macros to map the enums to the strings that
- * will be printed in the output.
- */
-#undef EM
-#undef E_
-#define EM(a, b)	{ a, b },
-#define E_(a, b)	{ a, b }
-
-
-TRACE_EVENT(cachefiles_ref,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct fscache_cookie *cookie,
-		     enum cachefiles_obj_ref_trace why,
-		     int usage),
-
-	    TP_ARGS(obj, cookie, why, usage),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,			obj		)
-		    __field(unsigned int,			cookie		)
-		    __field(enum cachefiles_obj_ref_trace,	why		)
-		    __field(int,				usage		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->cookie	= cookie->debug_id;
-		    __entry->usage	= usage;
-		    __entry->why	= why;
-			   ),
-
-	    TP_printk("c=%08x o=%08x u=%d %s",
-		      __entry->cookie, __entry->obj, __entry->usage,
-		      __print_symbolic(__entry->why, cachefiles_obj_ref_traces))
-	    );
-
-TRACE_EVENT(cachefiles_lookup,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     struct inode *inode),
-
-	    TP_ARGS(obj, de, inode),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj	)
-		    __field(struct dentry *,		de	)
-		    __field(struct inode *,		inode	)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->de		= de;
-		    __entry->inode	= inode;
-			   ),
-
-	    TP_printk("o=%08x d=%p i=%p",
-		      __entry->obj, __entry->de, __entry->inode)
-	    );
-
-TRACE_EVENT(cachefiles_mkdir,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de, int ret),
-
-	    TP_ARGS(obj, de, ret),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj	)
-		    __field(struct dentry *,		de	)
-		    __field(int,			ret	)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->de		= de;
-		    __entry->ret	= ret;
-			   ),
-
-	    TP_printk("o=%08x d=%p r=%u",
-		      __entry->obj, __entry->de, __entry->ret)
-	    );
-
-TRACE_EVENT(cachefiles_create,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de, int ret),
-
-	    TP_ARGS(obj, de, ret),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj	)
-		    __field(struct dentry *,		de	)
-		    __field(int,			ret	)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->de		= de;
-		    __entry->ret	= ret;
-			   ),
-
-	    TP_printk("o=%08x d=%p r=%u",
-		      __entry->obj, __entry->de, __entry->ret)
-	    );
-
-TRACE_EVENT(cachefiles_unlink,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     enum fscache_why_object_killed why),
-
-	    TP_ARGS(obj, de, why),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
-		    __field(enum fscache_why_object_killed, why		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj ? obj->fscache.debug_id : UINT_MAX;
-		    __entry->de		= de;
-		    __entry->why	= why;
-			   ),
-
-	    TP_printk("o=%08x d=%p w=%s",
-		      __entry->obj, __entry->de,
-		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
-	    );
-
-TRACE_EVENT(cachefiles_rename,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     struct dentry *to,
-		     enum fscache_why_object_killed why),
-
-	    TP_ARGS(obj, de, to, why),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
-		    __field(struct dentry *,		to		)
-		    __field(enum fscache_why_object_killed, why		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj ? obj->fscache.debug_id : UINT_MAX;
-		    __entry->de		= de;
-		    __entry->to		= to;
-		    __entry->why	= why;
-			   ),
-
-	    TP_printk("o=%08x d=%p t=%p w=%s",
-		      __entry->obj, __entry->de, __entry->to,
-		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
-	    );
-
-TRACE_EVENT(cachefiles_mark_active,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de),
-
-	    TP_ARGS(obj, de),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->de		= de;
-			   ),
-
-	    TP_printk("o=%08x d=%p",
-		      __entry->obj, __entry->de)
-	    );
-
-TRACE_EVENT(cachefiles_wait_active,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     struct cachefiles_object *xobj),
-
-	    TP_ARGS(obj, de, xobj),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj		)
-		    __field(unsigned int,		xobj		)
-		    __field(struct dentry *,		de		)
-		    __field(u16,			flags		)
-		    __field(u16,			fsc_flags	)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->de		= de;
-		    __entry->xobj	= xobj->fscache.debug_id;
-		    __entry->flags	= xobj->flags;
-		    __entry->fsc_flags	= xobj->fscache.flags;
-			   ),
-
-	    TP_printk("o=%08x d=%p wo=%08x wf=%x wff=%x",
-		      __entry->obj, __entry->de, __entry->xobj,
-		      __entry->flags, __entry->fsc_flags)
-	    );
-
-TRACE_EVENT(cachefiles_mark_inactive,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     struct inode *inode),
-
-	    TP_ARGS(obj, de, inode),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
-		    __field(struct inode *,		inode		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj->fscache.debug_id;
-		    __entry->de		= de;
-		    __entry->inode	= inode;
-			   ),
-
-	    TP_printk("o=%08x d=%p i=%p",
-		      __entry->obj, __entry->de, __entry->inode)
-	    );
-
-TRACE_EVENT(cachefiles_mark_buried,
-	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     enum fscache_why_object_killed why),
-
-	    TP_ARGS(obj, de, why),
-
-	    /* Note that obj may be NULL */
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
-		    __field(enum fscache_why_object_killed, why		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->obj	= obj ? obj->fscache.debug_id : UINT_MAX;
-		    __entry->de		= de;
-		    __entry->why	= why;
-			   ),
-
-	    TP_printk("o=%08x d=%p w=%s",
-		      __entry->obj, __entry->de,
-		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
-	    );
-
-#endif /* _TRACE_CACHEFILES_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 2cee6fbb7f01bcb25f11ef1439e89a29de4c0c1d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Oct 2021 21:53:44 +0100
Subject: fscache: Remove the contents of the fscache driver, pending rewrite

Remove the code that comprises the fscache driver as it's going to be
substantially rewritten, with the majority of the code being erased in the
rewrite.

A small piece of linux/fscache.h is left as that is #included by a bunch of
network filesystems.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819578724.215744.18210619052245724238.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906884814.143852.6727245089843862889.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967077097.1823006.1377665951499979089.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021485548.640689.13876080567388696162.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  | 548 +-------------------------
 include/linux/fscache.h        | 851 +----------------------------------------
 include/trace/events/fscache.h | 523 -------------------------
 3 files changed, 3 insertions(+), 1919 deletions(-)
 delete mode 100644 include/trace/events/fscache.h

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 8d39491c5f9f..47f21a53ac4b 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /* General filesystem caching backing cache interface
  *
- * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * NOTE!!! See:
@@ -15,551 +15,5 @@
 #define _LINUX_FSCACHE_CACHE_H
 
 #include <linux/fscache.h>
-#include <linux/sched.h>
-#include <linux/workqueue.h>
-
-#define NR_MAXCACHES BITS_PER_LONG
-
-struct fscache_cache;
-struct fscache_cache_ops;
-struct fscache_object;
-struct fscache_operation;
-
-enum fscache_obj_ref_trace {
-	fscache_obj_get_add_to_deps,
-	fscache_obj_get_queue,
-	fscache_obj_put_alloc_fail,
-	fscache_obj_put_attach_fail,
-	fscache_obj_put_drop_obj,
-	fscache_obj_put_enq_dep,
-	fscache_obj_put_queue,
-	fscache_obj_put_work,
-	fscache_obj_ref__nr_traces
-};
-
-/*
- * cache tag definition
- */
-struct fscache_cache_tag {
-	struct list_head	link;
-	struct fscache_cache	*cache;		/* cache referred to by this tag */
-	unsigned long		flags;
-#define FSCACHE_TAG_RESERVED	0		/* T if tag is reserved for a cache */
-	atomic_t		usage;
-	char			name[];	/* tag name */
-};
-
-/*
- * cache definition
- */
-struct fscache_cache {
-	const struct fscache_cache_ops *ops;
-	struct fscache_cache_tag *tag;		/* tag representing this cache */
-	struct kobject		*kobj;		/* system representation of this cache */
-	struct list_head	link;		/* link in list of caches */
-	size_t			max_index_size;	/* maximum size of index data */
-	char			identifier[36];	/* cache label */
-
-	/* node management */
-	struct work_struct	op_gc;		/* operation garbage collector */
-	struct list_head	object_list;	/* list of data/index objects */
-	struct list_head	op_gc_list;	/* list of ops to be deleted */
-	spinlock_t		object_list_lock;
-	spinlock_t		op_gc_list_lock;
-	atomic_t		object_count;	/* no. of live objects in this cache */
-	struct fscache_object	*fsdef;		/* object for the fsdef index */
-	unsigned long		flags;
-#define FSCACHE_IOERROR		0	/* cache stopped on I/O error */
-#define FSCACHE_CACHE_WITHDRAWN	1	/* cache has been withdrawn */
-};
-
-extern wait_queue_head_t fscache_cache_cleared_wq;
-
-/*
- * operation to be applied to a cache object
- * - retrieval initiation operations are done in the context of the process
- *   that issued them, and not in an async thread pool
- */
-typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
-typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
-typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op);
-
-enum fscache_operation_state {
-	FSCACHE_OP_ST_BLANK,		/* Op is not yet submitted */
-	FSCACHE_OP_ST_INITIALISED,	/* Op is initialised */
-	FSCACHE_OP_ST_PENDING,		/* Op is blocked from running */
-	FSCACHE_OP_ST_IN_PROGRESS,	/* Op is in progress */
-	FSCACHE_OP_ST_COMPLETE,		/* Op is complete */
-	FSCACHE_OP_ST_CANCELLED,	/* Op has been cancelled */
-	FSCACHE_OP_ST_DEAD		/* Op is now dead */
-};
-
-struct fscache_operation {
-	struct work_struct	work;		/* record for async ops */
-	struct list_head	pend_link;	/* link in object->pending_ops */
-	struct fscache_object	*object;	/* object to be operated upon */
-
-	unsigned long		flags;
-#define FSCACHE_OP_TYPE		0x000f	/* operation type */
-#define FSCACHE_OP_ASYNC	0x0001	/* - async op, processor may sleep for disk */
-#define FSCACHE_OP_MYTHREAD	0x0002	/* - processing is done be issuing thread, not pool */
-#define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
-#define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
-#define FSCACHE_OP_DEC_READ_CNT	6	/* decrement object->n_reads on destruction */
-#define FSCACHE_OP_UNUSE_COOKIE	7	/* call fscache_unuse_cookie() on completion */
-#define FSCACHE_OP_KEEP_FLAGS	0x00f0	/* flags to keep when repurposing an op */
-
-	enum fscache_operation_state state;
-	atomic_t		usage;
-	unsigned		debug_id;	/* debugging ID */
-
-	/* operation processor callback
-	 * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform
-	 *   the op in a non-pool thread */
-	fscache_operation_processor_t processor;
-
-	/* Operation cancellation cleanup (optional) */
-	fscache_operation_cancel_t cancel;
-
-	/* operation releaser */
-	fscache_operation_release_t release;
-};
-
-extern atomic_t fscache_op_debug_id;
-extern void fscache_op_work_func(struct work_struct *work);
-
-extern void fscache_enqueue_operation(struct fscache_operation *);
-extern void fscache_op_complete(struct fscache_operation *, bool);
-extern void fscache_put_operation(struct fscache_operation *);
-extern void fscache_operation_init(struct fscache_cookie *,
-				   struct fscache_operation *,
-				   fscache_operation_processor_t,
-				   fscache_operation_cancel_t,
-				   fscache_operation_release_t);
-
-/*
- * data read operation
- */
-struct fscache_retrieval {
-	struct fscache_operation op;
-	struct fscache_cookie	*cookie;	/* The netfs cookie */
-	struct address_space	*mapping;	/* netfs pages */
-	fscache_rw_complete_t	end_io_func;	/* function to call on I/O completion */
-	void			*context;	/* netfs read context (pinned) */
-	struct list_head	to_do;		/* list of things to be done by the backend */
-	atomic_t		n_pages;	/* number of pages to be retrieved */
-};
-
-typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
-					     struct page *page,
-					     gfp_t gfp);
-
-typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op,
-					      struct list_head *pages,
-					      unsigned *nr_pages,
-					      gfp_t gfp);
-
-/**
- * fscache_get_retrieval - Get an extra reference on a retrieval operation
- * @op: The retrieval operation to get a reference on
- *
- * Get an extra reference on a retrieval operation.
- */
-static inline
-struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op)
-{
-	atomic_inc(&op->op.usage);
-	return op;
-}
-
-/**
- * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing
- * @op: The retrieval operation affected
- *
- * Enqueue a retrieval operation for processing by the FS-Cache thread pool.
- */
-static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
-{
-	fscache_enqueue_operation(&op->op);
-}
-
-/**
- * fscache_retrieval_complete - Record (partial) completion of a retrieval
- * @op: The retrieval operation affected
- * @n_pages: The number of pages to account for
- */
-static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
-					      int n_pages)
-{
-	if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0)
-		fscache_op_complete(&op->op, false);
-}
-
-/**
- * fscache_put_retrieval - Drop a reference to a retrieval operation
- * @op: The retrieval operation affected
- *
- * Drop a reference to a retrieval operation.
- */
-static inline void fscache_put_retrieval(struct fscache_retrieval *op)
-{
-	fscache_put_operation(&op->op);
-}
-
-/*
- * cached page storage work item
- * - used to do three things:
- *   - batch writes to the cache
- *   - do cache writes asynchronously
- *   - defer writes until cache object lookup completion
- */
-struct fscache_storage {
-	struct fscache_operation op;
-	pgoff_t			store_limit;	/* don't write more than this */
-};
-
-/*
- * cache operations
- */
-struct fscache_cache_ops {
-	/* name of cache provider */
-	const char *name;
-
-	/* allocate an object record for a cookie */
-	struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
-					       struct fscache_cookie *cookie);
-
-	/* look up the object for a cookie
-	 * - return -ETIMEDOUT to be requeued
-	 */
-	int (*lookup_object)(struct fscache_object *object);
-
-	/* finished looking up */
-	void (*lookup_complete)(struct fscache_object *object);
-
-	/* increment the usage count on this object (may fail if unmounting) */
-	struct fscache_object *(*grab_object)(struct fscache_object *object,
-					      enum fscache_obj_ref_trace why);
-
-	/* pin an object in the cache */
-	int (*pin_object)(struct fscache_object *object);
-
-	/* unpin an object in the cache */
-	void (*unpin_object)(struct fscache_object *object);
-
-	/* check the consistency between the backing cache and the FS-Cache
-	 * cookie */
-	int (*check_consistency)(struct fscache_operation *op);
-
-	/* store the updated auxiliary data on an object */
-	void (*update_object)(struct fscache_object *object);
-
-	/* Invalidate an object */
-	void (*invalidate_object)(struct fscache_operation *op);
-
-	/* discard the resources pinned by an object and effect retirement if
-	 * necessary */
-	void (*drop_object)(struct fscache_object *object);
-
-	/* dispose of a reference to an object */
-	void (*put_object)(struct fscache_object *object,
-			   enum fscache_obj_ref_trace why);
-
-	/* sync a cache */
-	void (*sync_cache)(struct fscache_cache *cache);
-
-	/* notification that the attributes of a non-index object (such as
-	 * i_size) have changed */
-	int (*attr_changed)(struct fscache_object *object);
-
-	/* reserve space for an object's data and associated metadata */
-	int (*reserve_space)(struct fscache_object *object, loff_t i_size);
-
-	/* request a backing block for a page be read or allocated in the
-	 * cache */
-	fscache_page_retrieval_func_t read_or_alloc_page;
-
-	/* request backing blocks for a list of pages be read or allocated in
-	 * the cache */
-	fscache_pages_retrieval_func_t read_or_alloc_pages;
-
-	/* request a backing block for a page be allocated in the cache so that
-	 * it can be written directly */
-	fscache_page_retrieval_func_t allocate_page;
-
-	/* request backing blocks for pages be allocated in the cache so that
-	 * they can be written directly */
-	fscache_pages_retrieval_func_t allocate_pages;
-
-	/* write a page to its backing block in the cache */
-	int (*write_page)(struct fscache_storage *op, struct page *page);
-
-	/* detach backing block from a page (optional)
-	 * - must release the cookie lock before returning
-	 * - may sleep
-	 */
-	void (*uncache_page)(struct fscache_object *object,
-			     struct page *page);
-
-	/* dissociate a cache from all the pages it was backing */
-	void (*dissociate_pages)(struct fscache_cache *cache);
-
-	/* Begin a read operation for the netfs lib */
-	int (*begin_read_operation)(struct netfs_read_request *rreq,
-				    struct fscache_retrieval *op);
-};
-
-extern struct fscache_cookie fscache_fsdef_index;
-
-/*
- * Event list for fscache_object::{event_mask,events}
- */
-enum {
-	FSCACHE_OBJECT_EV_NEW_CHILD,	/* T if object has a new child */
-	FSCACHE_OBJECT_EV_PARENT_READY,	/* T if object's parent is ready */
-	FSCACHE_OBJECT_EV_UPDATE,	/* T if object should be updated */
-	FSCACHE_OBJECT_EV_INVALIDATE,	/* T if cache requested object invalidation */
-	FSCACHE_OBJECT_EV_CLEARED,	/* T if accessors all gone */
-	FSCACHE_OBJECT_EV_ERROR,	/* T if fatal error occurred during processing */
-	FSCACHE_OBJECT_EV_KILL,		/* T if netfs relinquished or cache withdrew object */
-	NR_FSCACHE_OBJECT_EVENTS
-};
-
-#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1)
-
-/*
- * States for object state machine.
- */
-struct fscache_transition {
-	unsigned long events;
-	const struct fscache_state *transit_to;
-};
-
-struct fscache_state {
-	char name[24];
-	char short_name[8];
-	const struct fscache_state *(*work)(struct fscache_object *object,
-					    int event);
-	const struct fscache_transition transitions[];
-};
-
-/*
- * on-disk cache file or index handle
- */
-struct fscache_object {
-	const struct fscache_state *state;	/* Object state machine state */
-	const struct fscache_transition *oob_table; /* OOB state transition table */
-	int			debug_id;	/* debugging ID */
-	int			n_children;	/* number of child objects */
-	int			n_ops;		/* number of extant ops on object */
-	int			n_obj_ops;	/* number of object ops outstanding on object */
-	int			n_in_progress;	/* number of ops in progress */
-	int			n_exclusive;	/* number of exclusive ops queued or in progress */
-	atomic_t		n_reads;	/* number of read ops in progress */
-	spinlock_t		lock;		/* state and operations lock */
-
-	unsigned long		lookup_jif;	/* time at which lookup started */
-	unsigned long		oob_event_mask;	/* OOB events this object is interested in */
-	unsigned long		event_mask;	/* events this object is interested in */
-	unsigned long		events;		/* events to be processed by this object
-						 * (order is important - using fls) */
-
-	unsigned long		flags;
-#define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
-#define FSCACHE_OBJECT_PENDING_WRITE	1	/* T if object has pending write */
-#define FSCACHE_OBJECT_WAITING		2	/* T if object is waiting on its parent */
-#define FSCACHE_OBJECT_IS_LIVE		3	/* T if object is not withdrawn or relinquished */
-#define FSCACHE_OBJECT_IS_LOOKED_UP	4	/* T if object has been looked up */
-#define FSCACHE_OBJECT_IS_AVAILABLE	5	/* T if object has become active */
-#define FSCACHE_OBJECT_RETIRED		6	/* T if object was retired on relinquishment */
-#define FSCACHE_OBJECT_KILLED_BY_CACHE	7	/* T if object was killed by the cache */
-#define FSCACHE_OBJECT_RUN_AFTER_DEAD	8	/* T if object has been dispatched after death */
-
-	struct list_head	cache_link;	/* link in cache->object_list */
-	struct hlist_node	cookie_link;	/* link in cookie->backing_objects */
-	struct fscache_cache	*cache;		/* cache that supplied this object */
-	struct fscache_cookie	*cookie;	/* netfs's file/index object */
-	struct fscache_object	*parent;	/* parent object */
-	struct work_struct	work;		/* attention scheduling record */
-	struct list_head	dependents;	/* FIFO of dependent objects */
-	struct list_head	dep_link;	/* link in parent's dependents list */
-	struct list_head	pending_ops;	/* unstarted operations on this object */
-	pgoff_t			store_limit;	/* current storage limit */
-	loff_t			store_limit_l;	/* current storage limit */
-};
-
-extern void fscache_object_init(struct fscache_object *, struct fscache_cookie *,
-				struct fscache_cache *);
-extern void fscache_object_destroy(struct fscache_object *);
-
-extern void fscache_object_lookup_negative(struct fscache_object *object);
-extern void fscache_obtained_object(struct fscache_object *object);
-
-static inline bool fscache_object_is_live(struct fscache_object *object)
-{
-	return test_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags);
-}
-
-static inline bool fscache_object_is_dying(struct fscache_object *object)
-{
-	return !fscache_object_is_live(object);
-}
-
-static inline bool fscache_object_is_available(struct fscache_object *object)
-{
-	return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags);
-}
-
-static inline bool fscache_cache_is_broken(struct fscache_object *object)
-{
-	return test_bit(FSCACHE_IOERROR, &object->cache->flags);
-}
-
-static inline bool fscache_object_is_active(struct fscache_object *object)
-{
-	return fscache_object_is_available(object) &&
-		fscache_object_is_live(object) &&
-		!fscache_cache_is_broken(object);
-}
-
-/**
- * fscache_object_destroyed - Note destruction of an object in a cache
- * @cache: The cache from which the object came
- *
- * Note the destruction and deallocation of an object record in a cache.
- */
-static inline void fscache_object_destroyed(struct fscache_cache *cache)
-{
-	if (atomic_dec_and_test(&cache->object_count))
-		wake_up_all(&fscache_cache_cleared_wq);
-}
-
-/**
- * fscache_object_lookup_error - Note an object encountered an error
- * @object: The object on which the error was encountered
- *
- * Note that an object encountered a fatal error (usually an I/O error) and
- * that it should be withdrawn as soon as possible.
- */
-static inline void fscache_object_lookup_error(struct fscache_object *object)
-{
-	set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events);
-}
-
-/**
- * fscache_set_store_limit - Set the maximum size to be stored in an object
- * @object: The object to set the maximum on
- * @i_size: The limit to set in bytes
- *
- * Set the maximum size an object is permitted to reach, implying the highest
- * byte that may be written.  Intended to be called by the attr_changed() op.
- *
- * See Documentation/filesystems/caching/backend-api.rst for a complete
- * description.
- */
-static inline
-void fscache_set_store_limit(struct fscache_object *object, loff_t i_size)
-{
-	object->store_limit_l = i_size;
-	object->store_limit = i_size >> PAGE_SHIFT;
-	if (i_size & ~PAGE_MASK)
-		object->store_limit++;
-}
-
-/**
- * fscache_end_io - End a retrieval operation on a page
- * @op: The FS-Cache operation covering the retrieval
- * @page: The page that was to be fetched
- * @error: The error code (0 if successful)
- *
- * Note the end of an operation to retrieve a page, as covered by a particular
- * operation record.
- */
-static inline void fscache_end_io(struct fscache_retrieval *op,
-				  struct page *page, int error)
-{
-	op->end_io_func(page, op->context, error);
-}
-
-static inline void __fscache_use_cookie(struct fscache_cookie *cookie)
-{
-	atomic_inc(&cookie->n_active);
-}
-
-/**
- * fscache_use_cookie - Request usage of cookie attached to an object
- * @object: Object description
- * 
- * Request usage of the cookie attached to an object.  NULL is returned if the
- * relinquishment had reduced the cookie usage count to 0.
- */
-static inline bool fscache_use_cookie(struct fscache_object *object)
-{
-	struct fscache_cookie *cookie = object->cookie;
-	return atomic_inc_not_zero(&cookie->n_active) != 0;
-}
-
-static inline bool __fscache_unuse_cookie(struct fscache_cookie *cookie)
-{
-	return atomic_dec_and_test(&cookie->n_active);
-}
-
-static inline void __fscache_wake_unused_cookie(struct fscache_cookie *cookie)
-{
-	wake_up_var(&cookie->n_active);
-}
-
-/**
- * fscache_unuse_cookie - Cease usage of cookie attached to an object
- * @object: Object description
- * 
- * Cease usage of the cookie attached to an object.  When the users count
- * reaches zero then the cookie relinquishment will be permitted to proceed.
- */
-static inline void fscache_unuse_cookie(struct fscache_object *object)
-{
-	struct fscache_cookie *cookie = object->cookie;
-	if (__fscache_unuse_cookie(cookie))
-		__fscache_wake_unused_cookie(cookie);
-}
-
-/*
- * out-of-line cache backend functions
- */
-extern __printf(3, 4)
-void fscache_init_cache(struct fscache_cache *cache,
-			const struct fscache_cache_ops *ops,
-			const char *idfmt, ...);
-
-extern int fscache_add_cache(struct fscache_cache *cache,
-			     struct fscache_object *fsdef,
-			     const char *tagname);
-extern void fscache_withdraw_cache(struct fscache_cache *cache);
-
-extern void fscache_io_error(struct fscache_cache *cache);
-
-extern void fscache_mark_page_cached(struct fscache_retrieval *op,
-				     struct page *page);
-
-extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
-				      struct pagevec *pagevec);
-
-extern bool fscache_object_sleep_till_congested(signed long *timeoutp);
-
-extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
-					       const void *data,
-					       uint16_t datalen,
-					       loff_t object_size);
-
-extern void fscache_object_retrying_stale(struct fscache_object *object);
-
-enum fscache_why_object_killed {
-	FSCACHE_OBJECT_IS_STALE,
-	FSCACHE_OBJECT_NO_SPACE,
-	FSCACHE_OBJECT_WAS_RETIRED,
-	FSCACHE_OBJECT_WAS_CULLED,
-};
-extern void fscache_object_mark_killed(struct fscache_object *object,
-				       enum fscache_why_object_killed why);
 
 #endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 3b2282c157f7..0364a4ca16f6 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -15,861 +15,14 @@
 #define _LINUX_FSCACHE_H
 
 #include <linux/fs.h>
-#include <linux/list.h>
-#include <linux/pagemap.h>
-#include <linux/pagevec.h>
-#include <linux/list_bl.h>
 #include <linux/netfs.h>
 
 #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
-#define fscache_available() (1)
 #define fscache_cookie_valid(cookie) (cookie)
+#define fscache_cookie_enabled(cookie) (cookie)
 #else
-#define fscache_available() (0)
 #define fscache_cookie_valid(cookie) (0)
+#define fscache_cookie_enabled(cookie) (0)
 #endif
 
-
-/* pattern used to fill dead space in an index entry */
-#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79
-
-struct pagevec;
-struct fscache_cache_tag;
-struct fscache_cookie;
-struct fscache_netfs;
-struct netfs_read_request;
-
-typedef void (*fscache_rw_complete_t)(struct page *page,
-				      void *context,
-				      int error);
-
-/* result of index entry consultation */
-enum fscache_checkaux {
-	FSCACHE_CHECKAUX_OKAY,		/* entry okay as is */
-	FSCACHE_CHECKAUX_NEEDS_UPDATE,	/* entry requires update */
-	FSCACHE_CHECKAUX_OBSOLETE,	/* entry requires deletion */
-};
-
-/*
- * fscache cookie definition
- */
-struct fscache_cookie_def {
-	/* name of cookie type */
-	char name[16];
-
-	/* cookie type */
-	uint8_t type;
-#define FSCACHE_COOKIE_TYPE_INDEX	0
-#define FSCACHE_COOKIE_TYPE_DATAFILE	1
-
-	/* select the cache into which to insert an entry in this index
-	 * - optional
-	 * - should return a cache identifier or NULL to cause the cache to be
-	 *   inherited from the parent if possible or the first cache picked
-	 *   for a non-index file if not
-	 */
-	struct fscache_cache_tag *(*select_cache)(
-		const void *parent_netfs_data,
-		const void *cookie_netfs_data);
-
-	/* consult the netfs about the state of an object
-	 * - this function can be absent if the index carries no state data
-	 * - the netfs data from the cookie being used as the target is
-	 *   presented, as is the auxiliary data and the object size
-	 */
-	enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
-					   const void *data,
-					   uint16_t datalen,
-					   loff_t object_size);
-
-	/* get an extra reference on a read context
-	 * - this function can be absent if the completion function doesn't
-	 *   require a context
-	 */
-	void (*get_context)(void *cookie_netfs_data, void *context);
-
-	/* release an extra reference on a read context
-	 * - this function can be absent if the completion function doesn't
-	 *   require a context
-	 */
-	void (*put_context)(void *cookie_netfs_data, void *context);
-
-	/* indicate page that now have cache metadata retained
-	 * - this function should mark the specified page as now being cached
-	 * - the page will have been marked with PG_fscache before this is
-	 *   called, so this is optional
-	 */
-	void (*mark_page_cached)(void *cookie_netfs_data,
-				 struct address_space *mapping,
-				 struct page *page);
-};
-
-/*
- * fscache cached network filesystem type
- * - name, version and ops must be filled in before registration
- * - all other fields will be set during registration
- */
-struct fscache_netfs {
-	uint32_t			version;	/* indexing version */
-	const char			*name;		/* filesystem name */
-	struct fscache_cookie		*primary_index;
-};
-
-/*
- * data file or index object cookie
- * - a file will only appear in one cache
- * - a request to cache a file may or may not be honoured, subject to
- *   constraints such as disk space
- * - indices are created on disk just-in-time
- */
-struct fscache_cookie {
-	refcount_t			ref;		/* number of users of this cookie */
-	atomic_t			n_children;	/* number of children of this cookie */
-	atomic_t			n_active;	/* number of active users of netfs ptrs */
-	unsigned int			debug_id;
-	spinlock_t			lock;
-	spinlock_t			stores_lock;	/* lock on page store tree */
-	struct hlist_head		backing_objects; /* object(s) backing this file/index */
-	const struct fscache_cookie_def	*def;		/* definition */
-	struct fscache_cookie		*parent;	/* parent of this entry */
-	struct hlist_bl_node		hash_link;	/* Link in hash table */
-	struct list_head		proc_link;	/* Link in proc list */
-	void				*netfs_data;	/* back pointer to netfs */
-	struct radix_tree_root		stores;		/* pages to be stored on this cookie */
-#define FSCACHE_COOKIE_PENDING_TAG	0		/* pages tag: pending write to cache */
-#define FSCACHE_COOKIE_STORING_TAG	1		/* pages tag: writing to cache */
-
-	unsigned long			flags;
-#define FSCACHE_COOKIE_LOOKING_UP	0	/* T if non-index cookie being looked up still */
-#define FSCACHE_COOKIE_NO_DATA_YET	1	/* T if new object with no cached data yet */
-#define FSCACHE_COOKIE_UNAVAILABLE	2	/* T if cookie is unavailable (error, etc) */
-#define FSCACHE_COOKIE_INVALIDATING	3	/* T if cookie is being invalidated */
-#define FSCACHE_COOKIE_RELINQUISHED	4	/* T if cookie has been relinquished */
-#define FSCACHE_COOKIE_ENABLED		5	/* T if cookie is enabled */
-#define FSCACHE_COOKIE_ENABLEMENT_LOCK	6	/* T if cookie is being en/disabled */
-#define FSCACHE_COOKIE_AUX_UPDATED	8	/* T if the auxiliary data was updated */
-#define FSCACHE_COOKIE_ACQUIRED		9	/* T if cookie is in use */
-#define FSCACHE_COOKIE_RELINQUISHING	10	/* T if cookie is being relinquished */
-
-	u8				type;		/* Type of object */
-	u8				key_len;	/* Length of index key */
-	u8				aux_len;	/* Length of auxiliary data */
-	u32				key_hash;	/* Hash of parent, type, key, len */
-	union {
-		void			*key;		/* Index key */
-		u8			inline_key[16];	/* - If the key is short enough */
-	};
-	union {
-		void			*aux;		/* Auxiliary data */
-		u8			inline_aux[8];	/* - If the aux data is short enough */
-	};
-};
-
-static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie)
-{
-	return fscache_cookie_valid(cookie) && test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
-}
-
-/*
- * slow-path functions for when there is actually caching available, and the
- * netfs does actually have a valid token
- * - these are not to be called directly
- * - these are undefined symbols when FS-Cache is not configured and the
- *   optimiser takes care of not using them
- */
-extern int __fscache_register_netfs(struct fscache_netfs *);
-extern void __fscache_unregister_netfs(struct fscache_netfs *);
-extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *);
-extern void __fscache_release_cache_tag(struct fscache_cache_tag *);
-
-extern struct fscache_cookie *__fscache_acquire_cookie(
-	struct fscache_cookie *,
-	const struct fscache_cookie_def *,
-	const void *, size_t,
-	const void *, size_t,
-	void *, loff_t, bool);
-extern void __fscache_relinquish_cookie(struct fscache_cookie *, const void *, bool);
-extern int __fscache_check_consistency(struct fscache_cookie *, const void *);
-extern void __fscache_update_cookie(struct fscache_cookie *, const void *);
-extern int __fscache_attr_changed(struct fscache_cookie *);
-extern void __fscache_invalidate(struct fscache_cookie *);
-extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
-
-#ifdef FSCACHE_USE_NEW_IO_API
-extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *);
-#else
-extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
-					struct page *,
-					fscache_rw_complete_t,
-					void *,
-					gfp_t);
-extern int __fscache_read_or_alloc_pages(struct fscache_cookie *,
-					 struct address_space *,
-					 struct list_head *,
-					 unsigned *,
-					 fscache_rw_complete_t,
-					 void *,
-					 gfp_t);
-extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t);
-extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t);
-extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
-extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
-extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
-extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *,
-					 gfp_t);
-extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *,
-					      struct inode *);
-extern void __fscache_readpages_cancel(struct fscache_cookie *cookie,
-				       struct list_head *pages);
-#endif /* FSCACHE_USE_NEW_IO_API */
-
-extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
-extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
-				    bool (*)(void *), void *);
-
-/**
- * fscache_register_netfs - Register a filesystem as desiring caching services
- * @netfs: The description of the filesystem
- *
- * Register a filesystem as desiring caching services if they're available.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_register_netfs(struct fscache_netfs *netfs)
-{
-	if (fscache_available())
-		return __fscache_register_netfs(netfs);
-	else
-		return 0;
-}
-
-/**
- * fscache_unregister_netfs - Indicate that a filesystem no longer desires
- * caching services
- * @netfs: The description of the filesystem
- *
- * Indicate that a filesystem no longer desires caching services for the
- * moment.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_unregister_netfs(struct fscache_netfs *netfs)
-{
-	if (fscache_available())
-		__fscache_unregister_netfs(netfs);
-}
-
-/**
- * fscache_lookup_cache_tag - Look up a cache tag
- * @name: The name of the tag to search for
- *
- * Acquire a specific cache referral tag that can be used to select a specific
- * cache in which to cache an index.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name)
-{
-	if (fscache_available())
-		return __fscache_lookup_cache_tag(name);
-	else
-		return NULL;
-}
-
-/**
- * fscache_release_cache_tag - Release a cache tag
- * @tag: The tag to release
- *
- * Release a reference to a cache referral tag previously looked up.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_release_cache_tag(struct fscache_cache_tag *tag)
-{
-	if (fscache_available())
-		__fscache_release_cache_tag(tag);
-}
-
-/**
- * fscache_acquire_cookie - Acquire a cookie to represent a cache object
- * @parent: The cookie that's to be the parent of this one
- * @def: A description of the cache object, including callback operations
- * @index_key: The index key for this cookie
- * @index_key_len: Size of the index key
- * @aux_data: The auxiliary data for the cookie (may be NULL)
- * @aux_data_len: Size of the auxiliary data buffer
- * @netfs_data: An arbitrary piece of data to be kept in the cookie to
- * represent the cache object to the netfs
- * @object_size: The initial size of object
- * @enable: Whether or not to enable a data cookie immediately
- *
- * This function is used to inform FS-Cache about part of an index hierarchy
- * that can be used to locate files.  This is done by requesting a cookie for
- * each index in the path to the file.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-struct fscache_cookie *fscache_acquire_cookie(
-	struct fscache_cookie *parent,
-	const struct fscache_cookie_def *def,
-	const void *index_key,
-	size_t index_key_len,
-	const void *aux_data,
-	size_t aux_data_len,
-	void *netfs_data,
-	loff_t object_size,
-	bool enable)
-{
-	if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent))
-		return __fscache_acquire_cookie(parent, def,
-						index_key, index_key_len,
-						aux_data, aux_data_len,
-						netfs_data, object_size, enable);
-	else
-		return NULL;
-}
-
-/**
- * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding
- * it
- * @cookie: The cookie being returned
- * @aux_data: The updated auxiliary data for the cookie (may be NULL)
- * @retire: True if the cache object the cookie represents is to be discarded
- *
- * This function returns a cookie to the cache, forcibly discarding the
- * associated cache object if retire is set to true.  The opportunity is
- * provided to update the auxiliary data in the cache before the object is
- * disconnected.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_relinquish_cookie(struct fscache_cookie *cookie,
-			       const void *aux_data,
-			       bool retire)
-{
-	if (fscache_cookie_valid(cookie))
-		__fscache_relinquish_cookie(cookie, aux_data, retire);
-}
-
-/**
- * fscache_check_consistency - Request validation of a cache's auxiliary data
- * @cookie: The cookie representing the cache object
- * @aux_data: The updated auxiliary data for the cookie (may be NULL)
- *
- * Request an consistency check from fscache, which passes the request to the
- * backing cache.  The auxiliary data on the cookie will be updated first if
- * @aux_data is set.
- *
- * Returns 0 if consistent and -ESTALE if inconsistent.  May also
- * return -ENOMEM and -ERESTARTSYS.
- */
-static inline
-int fscache_check_consistency(struct fscache_cookie *cookie,
-			      const void *aux_data)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_check_consistency(cookie, aux_data);
-	else
-		return 0;
-}
-
-/**
- * fscache_update_cookie - Request that a cache object be updated
- * @cookie: The cookie representing the cache object
- * @aux_data: The updated auxiliary data for the cookie (may be NULL)
- *
- * Request an update of the index data for the cache object associated with the
- * cookie.  The auxiliary data on the cookie will be updated first if @aux_data
- * is set.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		__fscache_update_cookie(cookie, aux_data);
-}
-
-/**
- * fscache_pin_cookie - Pin a data-storage cache object in its cache
- * @cookie: The cookie representing the cache object
- *
- * Permit data-storage cache objects to be pinned in the cache.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_pin_cookie(struct fscache_cookie *cookie)
-{
-	return -ENOBUFS;
-}
-
-/**
- * fscache_pin_cookie - Unpin a data-storage cache object in its cache
- * @cookie: The cookie representing the cache object
- *
- * Permit data-storage cache objects to be unpinned from the cache.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_unpin_cookie(struct fscache_cookie *cookie)
-{
-}
-
-/**
- * fscache_attr_changed - Notify cache that an object's attributes changed
- * @cookie: The cookie representing the cache object
- *
- * Send a notification to the cache indicating that an object's attributes have
- * changed.  This includes the data size.  These attributes will be obtained
- * through the get_attr() cookie definition op.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_attr_changed(struct fscache_cookie *cookie)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_attr_changed(cookie);
-	else
-		return -ENOBUFS;
-}
-
-/**
- * fscache_invalidate - Notify cache that an object needs invalidation
- * @cookie: The cookie representing the cache object
- *
- * Notify the cache that an object is needs to be invalidated and that it
- * should abort any retrievals or stores it is doing on the cache.  The object
- * is then marked non-caching until such time as the invalidation is complete.
- *
- * This can be called with spinlocks held.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_invalidate(struct fscache_cookie *cookie)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		__fscache_invalidate(cookie);
-}
-
-/**
- * fscache_wait_on_invalidate - Wait for invalidation to complete
- * @cookie: The cookie representing the cache object
- *
- * Wait for the invalidation of an object to complete.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
-{
-	if (fscache_cookie_valid(cookie))
-		__fscache_wait_on_invalidate(cookie);
-}
-
-/**
- * fscache_reserve_space - Reserve data space for a cached object
- * @cookie: The cookie representing the cache object
- * @i_size: The amount of space to be reserved
- *
- * Reserve an amount of space in the cache for the cache object attached to a
- * cookie so that a write to that object within the space can always be
- * honoured.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
-{
-	return -ENOBUFS;
-}
-
-#ifdef FSCACHE_USE_NEW_IO_API
-
-/**
- * fscache_begin_read_operation - Begin a read operation for the netfs lib
- * @rreq: The read request being undertaken
- * @cookie: The cookie representing the cache object
- *
- * Begin a read operation on behalf of the netfs helper library.  @rreq
- * indicates the read request to which the operation state should be attached;
- * @cookie indicates the cache object that will be accessed.
- *
- * This is intended to be called from the ->begin_cache_operation() netfs lib
- * operation as implemented by the network filesystem.
- *
- * Returns:
- * * 0		- Success
- * * -ENOBUFS	- No caching available
- * * Other error code from the cache, such as -ENOMEM.
- */
-static inline
-int fscache_begin_read_operation(struct netfs_read_request *rreq,
-				 struct fscache_cookie *cookie)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_begin_read_operation(rreq, cookie);
-	return -ENOBUFS;
-}
-
-#else /* FSCACHE_USE_NEW_IO_API */
-
-/**
- * fscache_read_or_alloc_page - Read a page from the cache or allocate a block
- * in which to store it
- * @cookie: The cookie representing the cache object
- * @page: The netfs page to fill if possible
- * @end_io_func: The callback to invoke when and if the page is filled
- * @context: An arbitrary piece of data to pass on to end_io_func()
- * @gfp: The conditions under which memory allocation should be made
- *
- * Read a page from the cache, or if that's not possible make a potential
- * one-block reservation in the cache into which the page may be stored once
- * fetched from the server.
- *
- * If the page is not backed by the cache object, or if it there's some reason
- * it can't be, -ENOBUFS will be returned and nothing more will be done for
- * that page.
- *
- * Else, if that page is backed by the cache, a read will be initiated directly
- * to the netfs's page and 0 will be returned by this function.  The
- * end_io_func() callback will be invoked when the operation terminates on a
- * completion or failure.  Note that the callback may be invoked before the
- * return.
- *
- * Else, if the page is unbacked, -ENODATA is returned and a block may have
- * been allocated in the cache.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
-			       struct page *page,
-			       fscache_rw_complete_t end_io_func,
-			       void *context,
-			       gfp_t gfp)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_read_or_alloc_page(cookie, page, end_io_func,
-						    context, gfp);
-	else
-		return -ENOBUFS;
-}
-
-/**
- * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate
- * blocks in which to store them
- * @cookie: The cookie representing the cache object
- * @mapping: The netfs inode mapping to which the pages will be attached
- * @pages: A list of potential netfs pages to be filled
- * @nr_pages: Number of pages to be read and/or allocated
- * @end_io_func: The callback to invoke when and if each page is filled
- * @context: An arbitrary piece of data to pass on to end_io_func()
- * @gfp: The conditions under which memory allocation should be made
- *
- * Read a set of pages from the cache, or if that's not possible, attempt to
- * make a potential one-block reservation for each page in the cache into which
- * that page may be stored once fetched from the server.
- *
- * If some pages are not backed by the cache object, or if it there's some
- * reason they can't be, -ENOBUFS will be returned and nothing more will be
- * done for that pages.
- *
- * Else, if some of the pages are backed by the cache, a read will be initiated
- * directly to the netfs's page and 0 will be returned by this function.  The
- * end_io_func() callback will be invoked when the operation terminates on a
- * completion or failure.  Note that the callback may be invoked before the
- * return.
- *
- * Else, if a page is unbacked, -ENODATA is returned and a block may have
- * been allocated in the cache.
- *
- * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in
- * regard to different pages, the return values are prioritised in that order.
- * Any pages submitted for reading are removed from the pages list.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
-				struct address_space *mapping,
-				struct list_head *pages,
-				unsigned *nr_pages,
-				fscache_rw_complete_t end_io_func,
-				void *context,
-				gfp_t gfp)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_read_or_alloc_pages(cookie, mapping, pages,
-						     nr_pages, end_io_func,
-						     context, gfp);
-	else
-		return -ENOBUFS;
-}
-
-/**
- * fscache_alloc_page - Allocate a block in which to store a page
- * @cookie: The cookie representing the cache object
- * @page: The netfs page to allocate a page for
- * @gfp: The conditions under which memory allocation should be made
- *
- * Request Allocation a block in the cache in which to store a netfs page
- * without retrieving any contents from the cache.
- *
- * If the page is not backed by a file then -ENOBUFS will be returned and
- * nothing more will be done, and no reservation will be made.
- *
- * Else, a block will be allocated if one wasn't already, and 0 will be
- * returned
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_alloc_page(struct fscache_cookie *cookie,
-		       struct page *page,
-		       gfp_t gfp)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_alloc_page(cookie, page, gfp);
-	else
-		return -ENOBUFS;
-}
-
-/**
- * fscache_readpages_cancel - Cancel read/alloc on pages
- * @cookie: The cookie representing the inode's cache object.
- * @pages: The netfs pages that we canceled write on in readpages()
- *
- * Uncache/unreserve the pages reserved earlier in readpages() via
- * fscache_readpages_or_alloc() and similar.  In most successful caches in
- * readpages() this doesn't do anything.  In cases when the underlying netfs's
- * readahead failed we need to clean up the pagelist (unmark and uncache).
- *
- * This function may sleep as it may have to clean up disk state.
- */
-static inline
-void fscache_readpages_cancel(struct fscache_cookie *cookie,
-			      struct list_head *pages)
-{
-	if (fscache_cookie_valid(cookie))
-		__fscache_readpages_cancel(cookie, pages);
-}
-
-/**
- * fscache_write_page - Request storage of a page in the cache
- * @cookie: The cookie representing the cache object
- * @page: The netfs page to store
- * @object_size: Updated size of object
- * @gfp: The conditions under which memory allocation should be made
- *
- * Request the contents of the netfs page be written into the cache.  This
- * request may be ignored if no cache block is currently allocated, in which
- * case it will return -ENOBUFS.
- *
- * If a cache block was already allocated, a write will be initiated and 0 will
- * be returned.  The PG_fscache_write page bit is set immediately and will then
- * be cleared at the completion of the write to indicate the success or failure
- * of the operation.  Note that the completion may happen before the return.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-int fscache_write_page(struct fscache_cookie *cookie,
-		       struct page *page,
-		       loff_t object_size,
-		       gfp_t gfp)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_write_page(cookie, page, object_size, gfp);
-	else
-		return -ENOBUFS;
-}
-
-/**
- * fscache_uncache_page - Indicate that caching is no longer required on a page
- * @cookie: The cookie representing the cache object
- * @page: The netfs page that was being cached.
- *
- * Tell the cache that we no longer want a page to be cached and that it should
- * remove any knowledge of the netfs page it may have.
- *
- * Note that this cannot cancel any outstanding I/O operations between this
- * page and the cache.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_uncache_page(struct fscache_cookie *cookie,
-			  struct page *page)
-{
-	if (fscache_cookie_valid(cookie))
-		__fscache_uncache_page(cookie, page);
-}
-
-/**
- * fscache_check_page_write - Ask if a page is being writing to the cache
- * @cookie: The cookie representing the cache object
- * @page: The netfs page that is being cached.
- *
- * Ask the cache if a page is being written to the cache.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-bool fscache_check_page_write(struct fscache_cookie *cookie,
-			      struct page *page)
-{
-	if (fscache_cookie_valid(cookie))
-		return __fscache_check_page_write(cookie, page);
-	return false;
-}
-
-/**
- * fscache_wait_on_page_write - Wait for a page to complete writing to the cache
- * @cookie: The cookie representing the cache object
- * @page: The netfs page that is being cached.
- *
- * Ask the cache to wake us up when a page is no longer being written to the
- * cache.
- *
- * See Documentation/filesystems/caching/netfs-api.rst for a complete
- * description.
- */
-static inline
-void fscache_wait_on_page_write(struct fscache_cookie *cookie,
-				struct page *page)
-{
-	if (fscache_cookie_valid(cookie))
-		__fscache_wait_on_page_write(cookie, page);
-}
-
-/**
- * fscache_maybe_release_page - Consider releasing a page, cancelling a store
- * @cookie: The cookie representing the cache object
- * @page: The netfs page that is being cached.
- * @gfp: The gfp flags passed to releasepage()
- *
- * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's
- * releasepage() call.  A storage request on the page may cancelled if it is
- * not currently being processed.
- *
- * The function returns true if the page no longer has a storage request on it,
- * and false if a storage request is left in place.  If true is returned, the
- * page will have been passed to fscache_uncache_page().  If false is returned
- * the page cannot be freed yet.
- */
-static inline
-bool fscache_maybe_release_page(struct fscache_cookie *cookie,
-				struct page *page,
-				gfp_t gfp)
-{
-	if (fscache_cookie_valid(cookie) && PageFsCache(page))
-		return __fscache_maybe_release_page(cookie, page, gfp);
-	return true;
-}
-
-/**
- * fscache_uncache_all_inode_pages - Uncache all an inode's pages
- * @cookie: The cookie representing the inode's cache object.
- * @inode: The inode to uncache pages from.
- *
- * Uncache all the pages in an inode that are marked PG_fscache, assuming them
- * to be associated with the given cookie.
- *
- * This function may sleep.  It will wait for pages that are being written out
- * and will wait whilst the PG_fscache mark is removed by the cache.
- */
-static inline
-void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
-				     struct inode *inode)
-{
-	if (fscache_cookie_valid(cookie))
-		__fscache_uncache_all_inode_pages(cookie, inode);
-}
-
-#endif /* FSCACHE_USE_NEW_IO_API */
-
-/**
- * fscache_disable_cookie - Disable a cookie
- * @cookie: The cookie representing the cache object
- * @aux_data: The updated auxiliary data for the cookie (may be NULL)
- * @invalidate: Invalidate the backing object
- *
- * Disable a cookie from accepting further alloc, read, write, invalidate,
- * update or acquire operations.  Outstanding operations can still be waited
- * upon and pages can still be uncached and the cookie relinquished.
- *
- * This will not return until all outstanding operations have completed.
- *
- * If @invalidate is set, then the backing object will be invalidated and
- * detached, otherwise it will just be detached.
- *
- * If @aux_data is set, then auxiliary data will be updated from that.
- */
-static inline
-void fscache_disable_cookie(struct fscache_cookie *cookie,
-			    const void *aux_data,
-			    bool invalidate)
-{
-	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		__fscache_disable_cookie(cookie, aux_data, invalidate);
-}
-
-/**
- * fscache_enable_cookie - Reenable a cookie
- * @cookie: The cookie representing the cache object
- * @aux_data: The updated auxiliary data for the cookie (may be NULL)
- * @object_size: Current size of object
- * @can_enable: A function to permit enablement once lock is held
- * @data: Data for can_enable()
- *
- * Reenable a previously disabled cookie, allowing it to accept further alloc,
- * read, write, invalidate, update or acquire operations.  An attempt will be
- * made to immediately reattach the cookie to a backing object.  If @aux_data
- * is set, the auxiliary data attached to the cookie will be updated.
- *
- * The can_enable() function is called (if not NULL) once the enablement lock
- * is held to rule on whether enablement is still permitted to go ahead.
- */
-static inline
-void fscache_enable_cookie(struct fscache_cookie *cookie,
-			   const void *aux_data,
-			   loff_t object_size,
-			   bool (*can_enable)(void *data),
-			   void *data)
-{
-	if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie))
-		__fscache_enable_cookie(cookie, aux_data, object_size,
-					can_enable, data);
-}
-
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
deleted file mode 100644
index 446392f5ba83..000000000000
--- a/include/trace/events/fscache.h
+++ /dev/null
@@ -1,523 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* FS-Cache tracepoints
- *
- * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM fscache
-
-#if !defined(_TRACE_FSCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_FSCACHE_H
-
-#include <linux/fscache.h>
-#include <linux/tracepoint.h>
-
-/*
- * Define enums for tracing information.
- */
-#ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
-#define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
-
-enum fscache_cookie_trace {
-	fscache_cookie_collision,
-	fscache_cookie_discard,
-	fscache_cookie_get_acquire_parent,
-	fscache_cookie_get_attach_object,
-	fscache_cookie_get_reacquire,
-	fscache_cookie_get_register_netfs,
-	fscache_cookie_put_acquire_nobufs,
-	fscache_cookie_put_dup_netfs,
-	fscache_cookie_put_relinquish,
-	fscache_cookie_put_object,
-	fscache_cookie_put_parent,
-};
-
-enum fscache_page_trace {
-	fscache_page_cached,
-	fscache_page_inval,
-	fscache_page_maybe_release,
-	fscache_page_radix_clear_store,
-	fscache_page_radix_delete,
-	fscache_page_radix_insert,
-	fscache_page_radix_pend2store,
-	fscache_page_radix_set_pend,
-	fscache_page_uncache,
-	fscache_page_write,
-	fscache_page_write_end,
-	fscache_page_write_end_pend,
-	fscache_page_write_end_noc,
-	fscache_page_write_wait,
-	fscache_page_trace__nr
-};
-
-enum fscache_op_trace {
-	fscache_op_cancel,
-	fscache_op_cancel_all,
-	fscache_op_cancelled,
-	fscache_op_completed,
-	fscache_op_enqueue_async,
-	fscache_op_enqueue_mythread,
-	fscache_op_gc,
-	fscache_op_init,
-	fscache_op_put,
-	fscache_op_run,
-	fscache_op_signal,
-	fscache_op_submit,
-	fscache_op_submit_ex,
-	fscache_op_work,
-	fscache_op_trace__nr
-};
-
-enum fscache_page_op_trace {
-	fscache_page_op_alloc_one,
-	fscache_page_op_attr_changed,
-	fscache_page_op_check_consistency,
-	fscache_page_op_invalidate,
-	fscache_page_op_retr_multi,
-	fscache_page_op_retr_one,
-	fscache_page_op_write_one,
-	fscache_page_op_trace__nr
-};
-
-#endif
-
-/*
- * Declare tracing information enums and their string mappings for display.
- */
-#define fscache_cookie_traces						\
-	EM(fscache_cookie_collision,		"*COLLISION*")		\
-	EM(fscache_cookie_discard,		"DISCARD")		\
-	EM(fscache_cookie_get_acquire_parent,	"GET prn")		\
-	EM(fscache_cookie_get_attach_object,	"GET obj")		\
-	EM(fscache_cookie_get_reacquire,	"GET raq")		\
-	EM(fscache_cookie_get_register_netfs,	"GET net")		\
-	EM(fscache_cookie_put_acquire_nobufs,	"PUT nbf")		\
-	EM(fscache_cookie_put_dup_netfs,	"PUT dnt")		\
-	EM(fscache_cookie_put_relinquish,	"PUT rlq")		\
-	EM(fscache_cookie_put_object,		"PUT obj")		\
-	E_(fscache_cookie_put_parent,		"PUT prn")
-
-#define fscache_page_traces						\
-	EM(fscache_page_cached,			"Cached ")		\
-	EM(fscache_page_inval,			"InvalPg")		\
-	EM(fscache_page_maybe_release,		"MayRels")		\
-	EM(fscache_page_uncache,		"Uncache")		\
-	EM(fscache_page_radix_clear_store,	"RxCStr ")		\
-	EM(fscache_page_radix_delete,		"RxDel  ")		\
-	EM(fscache_page_radix_insert,		"RxIns  ")		\
-	EM(fscache_page_radix_pend2store,	"RxP2S  ")		\
-	EM(fscache_page_radix_set_pend,		"RxSPend ")		\
-	EM(fscache_page_write,			"WritePg")		\
-	EM(fscache_page_write_end,		"EndPgWr")		\
-	EM(fscache_page_write_end_pend,		"EndPgWP")		\
-	EM(fscache_page_write_end_noc,		"EndPgNC")		\
-	E_(fscache_page_write_wait,		"WtOnWrt")
-
-#define fscache_op_traces						\
-	EM(fscache_op_cancel,			"Cancel1")		\
-	EM(fscache_op_cancel_all,		"CancelA")		\
-	EM(fscache_op_cancelled,		"Canclld")		\
-	EM(fscache_op_completed,		"Complet")		\
-	EM(fscache_op_enqueue_async,		"EnqAsyn")		\
-	EM(fscache_op_enqueue_mythread,		"EnqMyTh")		\
-	EM(fscache_op_gc,			"GC     ")		\
-	EM(fscache_op_init,			"Init   ")		\
-	EM(fscache_op_put,			"Put    ")		\
-	EM(fscache_op_run,			"Run    ")		\
-	EM(fscache_op_signal,			"Signal ")		\
-	EM(fscache_op_submit,			"Submit ")		\
-	EM(fscache_op_submit_ex,		"SubmitX")		\
-	E_(fscache_op_work,			"Work   ")
-
-#define fscache_page_op_traces						\
-	EM(fscache_page_op_alloc_one,		"Alloc1 ")		\
-	EM(fscache_page_op_attr_changed,	"AttrChg")		\
-	EM(fscache_page_op_check_consistency,	"CheckCn")		\
-	EM(fscache_page_op_invalidate,		"Inval  ")		\
-	EM(fscache_page_op_retr_multi,		"RetrMul")		\
-	EM(fscache_page_op_retr_one,		"Retr1  ")		\
-	E_(fscache_page_op_write_one,		"Write1 ")
-
-/*
- * Export enum symbols via userspace.
- */
-#undef EM
-#undef E_
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define E_(a, b) TRACE_DEFINE_ENUM(a);
-
-fscache_cookie_traces;
-
-/*
- * Now redefine the EM() and E_() macros to map the enums to the strings that
- * will be printed in the output.
- */
-#undef EM
-#undef E_
-#define EM(a, b)	{ a, b },
-#define E_(a, b)	{ a, b }
-
-
-TRACE_EVENT(fscache_cookie,
-	    TP_PROTO(unsigned int cookie_debug_id,
-		     int ref,
-		     enum fscache_cookie_trace where),
-
-	    TP_ARGS(cookie_debug_id, ref, where),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(enum fscache_cookie_trace,	where		)
-		    __field(int,			ref		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie	= cookie_debug_id;
-		    __entry->where	= where;
-		    __entry->ref	= ref;
-			   ),
-
-	    TP_printk("%s c=%08x r=%d",
-		      __print_symbolic(__entry->where, fscache_cookie_traces),
-		      __entry->cookie, __entry->ref)
-	    );
-
-TRACE_EVENT(fscache_netfs,
-	    TP_PROTO(struct fscache_netfs *netfs),
-
-	    TP_ARGS(netfs),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __array(char,			name, 8		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= netfs->primary_index->debug_id;
-		    strncpy(__entry->name, netfs->name, 8);
-		    __entry->name[7]		= 0;
-			   ),
-
-	    TP_printk("c=%08x n=%s",
-		      __entry->cookie, __entry->name)
-	    );
-
-TRACE_EVENT(fscache_acquire,
-	    TP_PROTO(struct fscache_cookie *cookie),
-
-	    TP_ARGS(cookie),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		parent		)
-		    __array(char,			name, 8		)
-		    __field(int,			p_ref		)
-		    __field(int,			p_n_children	)
-		    __field(u8,				p_flags		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-		    __entry->parent		= cookie->parent->debug_id;
-		    __entry->p_ref		= refcount_read(&cookie->parent->ref);
-		    __entry->p_n_children	= atomic_read(&cookie->parent->n_children);
-		    __entry->p_flags		= cookie->parent->flags;
-		    memcpy(__entry->name, cookie->def->name, 8);
-		    __entry->name[7]		= 0;
-			   ),
-
-	    TP_printk("c=%08x p=%08x pr=%d pc=%d pf=%02x n=%s",
-		      __entry->cookie, __entry->parent, __entry->p_ref,
-		      __entry->p_n_children, __entry->p_flags, __entry->name)
-	    );
-
-TRACE_EVENT(fscache_relinquish,
-	    TP_PROTO(struct fscache_cookie *cookie, bool retire),
-
-	    TP_ARGS(cookie, retire),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		parent		)
-		    __field(int,			ref		)
-		    __field(int,			n_children	)
-		    __field(int,			n_active	)
-		    __field(u8,				flags		)
-		    __field(bool,			retire		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie	= cookie->debug_id;
-		    __entry->parent	= cookie->parent->debug_id;
-		    __entry->ref	= refcount_read(&cookie->ref);
-		    __entry->n_children	= atomic_read(&cookie->n_children);
-		    __entry->n_active	= atomic_read(&cookie->n_active);
-		    __entry->flags	= cookie->flags;
-		    __entry->retire	= retire;
-			   ),
-
-	    TP_printk("c=%08x r=%d p=%08x Nc=%d Na=%d f=%02x r=%u",
-		      __entry->cookie, __entry->ref,
-		      __entry->parent, __entry->n_children, __entry->n_active,
-		      __entry->flags, __entry->retire)
-	    );
-
-TRACE_EVENT(fscache_enable,
-	    TP_PROTO(struct fscache_cookie *cookie),
-
-	    TP_ARGS(cookie),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(int,			ref		)
-		    __field(int,			n_children	)
-		    __field(int,			n_active	)
-		    __field(u8,				flags		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie	= cookie->debug_id;
-		    __entry->ref	= refcount_read(&cookie->ref);
-		    __entry->n_children	= atomic_read(&cookie->n_children);
-		    __entry->n_active	= atomic_read(&cookie->n_active);
-		    __entry->flags	= cookie->flags;
-			   ),
-
-	    TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x",
-		      __entry->cookie, __entry->ref,
-		      __entry->n_children, __entry->n_active, __entry->flags)
-	    );
-
-TRACE_EVENT(fscache_disable,
-	    TP_PROTO(struct fscache_cookie *cookie),
-
-	    TP_ARGS(cookie),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(int,			ref		)
-		    __field(int,			n_children	)
-		    __field(int,			n_active	)
-		    __field(u8,				flags		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie	= cookie->debug_id;
-		    __entry->ref	= refcount_read(&cookie->ref);
-		    __entry->n_children	= atomic_read(&cookie->n_children);
-		    __entry->n_active	= atomic_read(&cookie->n_active);
-		    __entry->flags	= cookie->flags;
-			   ),
-
-	    TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x",
-		      __entry->cookie, __entry->ref,
-		      __entry->n_children, __entry->n_active, __entry->flags)
-	    );
-
-TRACE_EVENT(fscache_osm,
-	    TP_PROTO(struct fscache_object *object,
-		     const struct fscache_state *state,
-		     bool wait, bool oob, s8 event_num),
-
-	    TP_ARGS(object, state, wait, oob, event_num),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		object		)
-		    __array(char,			state, 8	)
-		    __field(bool,			wait		)
-		    __field(bool,			oob		)
-		    __field(s8,				event_num	)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= object->cookie->debug_id;
-		    __entry->object		= object->debug_id;
-		    __entry->wait		= wait;
-		    __entry->oob		= oob;
-		    __entry->event_num		= event_num;
-		    memcpy(__entry->state, state->short_name, 8);
-			   ),
-
-	    TP_printk("c=%08x o=%08d %s %s%sev=%d",
-		      __entry->cookie,
-		      __entry->object,
-		      __entry->state,
-		      __print_symbolic(__entry->wait,
-				       { true,  "WAIT" },
-				       { false, "WORK" }),
-		      __print_symbolic(__entry->oob,
-				       { true,  " OOB " },
-				       { false, " " }),
-		      __entry->event_num)
-	    );
-
-TRACE_EVENT(fscache_page,
-	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
-		     enum fscache_page_trace why),
-
-	    TP_ARGS(cookie, page, why),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(pgoff_t,			page		)
-		    __field(enum fscache_page_trace,	why		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-		    __entry->page		= page->index;
-		    __entry->why		= why;
-			   ),
-
-	    TP_printk("c=%08x %s pg=%lx",
-		      __entry->cookie,
-		      __print_symbolic(__entry->why, fscache_page_traces),
-		      __entry->page)
-	    );
-
-TRACE_EVENT(fscache_check_page,
-	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
-		     void *val, int n),
-
-	    TP_ARGS(cookie, page, val, n),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(void *,			page		)
-		    __field(void *,			val		)
-		    __field(int,			n		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-		    __entry->page		= page;
-		    __entry->val		= val;
-		    __entry->n			= n;
-			   ),
-
-	    TP_printk("c=%08x pg=%p val=%p n=%d",
-		      __entry->cookie, __entry->page, __entry->val, __entry->n)
-	    );
-
-TRACE_EVENT(fscache_wake_cookie,
-	    TP_PROTO(struct fscache_cookie *cookie),
-
-	    TP_ARGS(cookie),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-			   ),
-
-	    TP_printk("c=%08x", __entry->cookie)
-	    );
-
-TRACE_EVENT(fscache_op,
-	    TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op,
-		     enum fscache_op_trace why),
-
-	    TP_ARGS(cookie, op, why),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		op		)
-		    __field(enum fscache_op_trace,	why		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie ? cookie->debug_id : 0;
-		    __entry->op			= op->debug_id;
-		    __entry->why		= why;
-			   ),
-
-	    TP_printk("c=%08x op=%08x %s",
-		      __entry->cookie, __entry->op,
-		      __print_symbolic(__entry->why, fscache_op_traces))
-	    );
-
-TRACE_EVENT(fscache_page_op,
-	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
-		     struct fscache_operation *op, enum fscache_page_op_trace what),
-
-	    TP_ARGS(cookie, page, op, what),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		op		)
-		    __field(pgoff_t,			page		)
-		    __field(enum fscache_page_op_trace,	what		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-		    __entry->page		= page ? page->index : 0;
-		    __entry->op			= op->debug_id;
-		    __entry->what		= what;
-			   ),
-
-	    TP_printk("c=%08x %s pg=%lx op=%08x",
-		      __entry->cookie,
-		      __print_symbolic(__entry->what, fscache_page_op_traces),
-		      __entry->page, __entry->op)
-	    );
-
-TRACE_EVENT(fscache_wrote_page,
-	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
-		     struct fscache_operation *op, int ret),
-
-	    TP_ARGS(cookie, page, op, ret),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		op		)
-		    __field(pgoff_t,			page		)
-		    __field(int,			ret		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-		    __entry->page		= page->index;
-		    __entry->op			= op->debug_id;
-		    __entry->ret		= ret;
-			   ),
-
-	    TP_printk("c=%08x pg=%lx op=%08x ret=%d",
-		      __entry->cookie, __entry->page, __entry->op, __entry->ret)
-	    );
-
-TRACE_EVENT(fscache_gang_lookup,
-	    TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op,
-		     void **results, int n, pgoff_t store_limit),
-
-	    TP_ARGS(cookie, op, results, n, store_limit),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		cookie		)
-		    __field(unsigned int,		op		)
-		    __field(pgoff_t,			results0	)
-		    __field(int,			n		)
-		    __field(pgoff_t,			store_limit	)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->cookie		= cookie->debug_id;
-		    __entry->op			= op->debug_id;
-		    __entry->results0		= results[0] ? ((struct page *)results[0])->index : (pgoff_t)-1;
-		    __entry->n			= n;
-		    __entry->store_limit	= store_limit;
-			   ),
-
-	    TP_printk("c=%08x op=%08x r0=%lx n=%d sl=%lx",
-		      __entry->cookie, __entry->op, __entry->results0, __entry->n,
-		      __entry->store_limit)
-	    );
-
-#endif /* _TRACE_FSCACHE_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 9e1aa6b8f484dde5ada1212092d20ea3f55c24e8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 12 Oct 2021 12:05:27 +0100
Subject: netfs: Display the netfs inode number in the netfs_read tracepoint

Display the netfs inode number in the netfs_read tracepoint so that this
can be used to correlate with the cachefiles_prep_read tracepoint.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819581097.215744.17476611915583897051.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906885903.143852.12229407815154182247.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967078164.1823006.15286989199782861123.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021487412.640689.7544388469390936443.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/netfs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 4d470bffd9f1..e6f4ebbb4c69 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -135,6 +135,7 @@ TRACE_EVENT(netfs_read,
 		    __field(loff_t,			start		)
 		    __field(size_t,			len		)
 		    __field(enum netfs_read_trace,	what		)
+		    __field(unsigned int,		netfs_inode	)
 			     ),
 
 	    TP_fast_assign(
@@ -143,12 +144,14 @@ TRACE_EVENT(netfs_read,
 		    __entry->start	= start;
 		    __entry->len	= len;
 		    __entry->what	= what;
+		    __entry->netfs_inode = rreq->inode->i_ino;
 			   ),
 
-	    TP_printk("R=%08x %s c=%08x s=%llx %zx",
+	    TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx",
 		      __entry->rreq,
 		      __print_symbolic(__entry->what, netfs_read_traces),
 		      __entry->cookie,
+		      __entry->netfs_inode,
 		      __entry->start, __entry->len)
 	    );
 
-- 
cgit v1.2.3


From a39c41b853ee51f4dcd19f5556f860ae8e2f23d3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:30:37 +0100
Subject: netfs: Pass a flag to ->prepare_write() to say if there's no alloc'd
 space

Pass a flag to ->prepare_write() to indicate if there's definitely no
space allocated in the cache yet (for instance if we've already checked as
we were asked to do a read).

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819583123.215744.12783808230464471417.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906886835.143852.6689886781122679769.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967079100.1823006.12889542712309574359.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021489334.640689.3131206613015409076.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/netfs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ca0683b9e3d1..1ea22fc48818 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -232,7 +232,8 @@ struct netfs_cache_ops {
 	 * actually do.
 	 */
 	int (*prepare_write)(struct netfs_cache_resources *cres,
-			     loff_t *_start, size_t *_len, loff_t i_size);
+			     loff_t *_start, size_t *_len, loff_t i_size,
+			     bool no_space_allocated_yet);
 };
 
 struct readahead_control;
-- 
cgit v1.2.3


From 1e1236b841166f1d2daf36fdf6bb3e656bc5f5ca Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:34:41 +0100
Subject: fscache: Introduce new driver

Introduce basic skeleton of the new, rewritten fscache driver.

Changes
=======
ver #3:
 - Use remove_proc_subtree(), not remove_proc_entry() to remove a populated
   dir.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819584034.215744.4290533472390439030.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906887770.143852.3577888294989185666.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967080039.1823006.5702921801104057922.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021491014.640689.4292699878317589512.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  2 ++
 include/linux/fscache.h        |  6 +++++-
 include/trace/events/fscache.h | 49 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 include/trace/events/fscache.h

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 47f21a53ac4b..d6910a913918 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -16,4 +16,6 @@
 
 #include <linux/fscache.h>
 
+extern struct workqueue_struct *fscache_wq;
+
 #endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 0364a4ca16f6..1cf90c252aac 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /* General filesystem caching interface
  *
- * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * NOTE!!! See:
@@ -18,9 +18,13 @@
 #include <linux/netfs.h>
 
 #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+#define __fscache_available (1)
+#define fscache_available() (1)
 #define fscache_cookie_valid(cookie) (cookie)
 #define fscache_cookie_enabled(cookie) (cookie)
 #else
+#define __fscache_available (0)
+#define fscache_available() (0)
 #define fscache_cookie_valid(cookie) (0)
 #define fscache_cookie_enabled(cookie) (0)
 #endif
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
new file mode 100644
index 000000000000..fe214c5cc87f
--- /dev/null
+++ b/include/trace/events/fscache.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* FS-Cache tracepoints
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fscache
+
+#if !defined(_TRACE_FSCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FSCACHE_H
+
+#include <linux/fscache.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Define enums for tracing information.
+ */
+#ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+#endif
+
+/*
+ * Declare tracing information enums and their string mappings for display.
+ */
+
+/*
+ * Export enum symbols via userspace.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef E_
+#define EM(a, b)	{ a, b },
+#define E_(a, b)	{ a, b }
+
+
+#endif /* _TRACE_FSCACHE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 9549332df4ed4e761a1d41c83f2c25d28bb22431 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:00:26 +0100
Subject: fscache: Implement cache registration

Implement a register of caches and provide functions to manage it.

Two functions are provided for the cache backend to use:

 (1) Acquire a cache cookie:

	struct fscache_cache *fscache_acquire_cache(const char *name)

     This gets the cache cookie for a cache of the specified name and moves
     it to the preparation state.  If a nameless cache cookie exists, that
     will be given this name and used.

 (2) Relinquish a cache cookie:

	void fscache_relinquish_cache(struct fscache_cache *cache);

     This relinquishes a cache cookie, cleans it and makes it available if
     it's still referenced by a network filesystem.

Note that network filesystems don't deal with cache cookies directly, but
rather go straight to the volume registration.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819587157.215744.13523139317322503286.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906889665.143852.10378009165231294456.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967085081.1823006.2218944206363626210.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021494847.640689.10109692261640524343.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  | 34 +++++++++++++++++++++++++++++++++
 include/trace/events/fscache.h | 43 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index d6910a913918..18cd5c9877bb 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -16,6 +16,40 @@
 
 #include <linux/fscache.h>
 
+enum fscache_cache_trace;
+enum fscache_access_trace;
+
+enum fscache_cache_state {
+	FSCACHE_CACHE_IS_NOT_PRESENT,	/* No cache is present for this name */
+	FSCACHE_CACHE_IS_PREPARING,	/* A cache is preparing to come live */
+	FSCACHE_CACHE_IS_ACTIVE,	/* Attached cache is active and can be used */
+	FSCACHE_CACHE_GOT_IOERROR,	/* Attached cache stopped on I/O error */
+	FSCACHE_CACHE_IS_WITHDRAWN,	/* Attached cache is being withdrawn */
+#define NR__FSCACHE_CACHE_STATE (FSCACHE_CACHE_IS_WITHDRAWN + 1)
+};
+
+/*
+ * Cache cookie.
+ */
+struct fscache_cache {
+	struct list_head	cache_link;	/* Link in cache list */
+	void			*cache_priv;	/* Private cache data (or NULL) */
+	refcount_t		ref;
+	atomic_t		n_volumes;	/* Number of active volumes; */
+	atomic_t		n_accesses;	/* Number of in-progress accesses on the cache */
+	atomic_t		object_count;	/* no. of live objects in this cache */
+	unsigned int		debug_id;
+	enum fscache_cache_state state;
+	char			*name;
+};
+
 extern struct workqueue_struct *fscache_wq;
 
+/*
+ * out-of-line cache backend functions
+ */
+extern struct rw_semaphore fscache_addremove_sem;
+extern struct fscache_cache *fscache_acquire_cache(const char *name);
+extern void fscache_relinquish_cache(struct fscache_cache *cache);
+
 #endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index fe214c5cc87f..3b8e0597b2c1 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -19,11 +19,27 @@
 #ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
 #define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
+enum fscache_cache_trace {
+	fscache_cache_collision,
+	fscache_cache_get_acquire,
+	fscache_cache_new_acquire,
+	fscache_cache_put_cache,
+	fscache_cache_put_prep_failed,
+	fscache_cache_put_relinquish,
+};
+
 #endif
 
 /*
  * Declare tracing information enums and their string mappings for display.
  */
+#define fscache_cache_traces						\
+	EM(fscache_cache_collision,		"*COLLIDE*")		\
+	EM(fscache_cache_get_acquire,		"GET acq  ")		\
+	EM(fscache_cache_new_acquire,		"NEW acq  ")		\
+	EM(fscache_cache_put_cache,		"PUT cache")		\
+	EM(fscache_cache_put_prep_failed,	"PUT pfail")		\
+	E_(fscache_cache_put_relinquish,	"PUT relnq")
 
 /*
  * Export enum symbols via userspace.
@@ -33,6 +49,8 @@
 #define EM(a, b) TRACE_DEFINE_ENUM(a);
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
+fscache_cache_traces;
+
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
  * will be printed in the output.
@@ -43,6 +61,31 @@
 #define E_(a, b)	{ a, b }
 
 
+TRACE_EVENT(fscache_cache,
+	    TP_PROTO(unsigned int cache_debug_id,
+		     int usage,
+		     enum fscache_cache_trace where),
+
+	    TP_ARGS(cache_debug_id, usage, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cache		)
+		    __field(int,			usage		)
+		    __field(enum fscache_cache_trace,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cache	= cache_debug_id;
+		    __entry->usage	= usage;
+		    __entry->where	= where;
+			   ),
+
+	    TP_printk("C=%08x %s r=%d",
+		      __entry->cache,
+		      __print_symbolic(__entry->where, fscache_cache_traces),
+		      __entry->usage)
+	    );
+
 #endif /* _TRACE_FSCACHE_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 62ab63352350e881ae693a8236b35d7d0516c78b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:26:17 +0100
Subject: fscache: Implement volume registration

Add functions to the fscache API to allow volumes to be acquired and
relinquished by the network filesystem.  A volume is an index of data
storage cache objects.  A volume is represented by a volume cookie in the
API.  A filesystem would typically create a volume for a superblock and
then create per-inode cookies within it.

To request a volume, the filesystem calls:

	struct fscache_volume *
	fscache_acquire_volume(const char *volume_key,
			       const char *cache_name,
			       const void *coherency_data,
			       size_t coherency_len)

The volume_key is a printable string used to match the volume in the cache.
It should not contain any '/' characters.  For AFS, for example, this would
be "afs,<cellname>,<volume_id>", e.g. "afs,example.com,523001".

The cache_name can be NULL, but if not it should be a string indicating the
name of the cache to use if there's more than one available.

The coherency data, if given, is an arbitrarily-sized blob that's attached
to the volume and is compared when the volume is looked up.  If it doesn't
match, the old volume is judged to be out of date and it and everything
within it is discarded.

Acquiring a volume twice concurrently is disallowed, though the function
will wait if an old volume cookie is being relinquishing.


When a network filesystem has finished with a volume, it should return the
volume cookie by calling:

	void
	fscache_relinquish_volume(struct fscache_volume *volume,
				  const void *coherency_data,
				  bool invalidate)

If invalidate is true, the entire volume will be discarded; if false, the
volume will be synced and the coherency data will be updated.

Changes
=======
ver #4:
 - Removed an extraneous param from kdoc on fscache_relinquish_volume()[3].

ver #3:
 - fscache_hash()'s size parameter is now in bytes.  Use __le32 as the unit
   to round up to.
 - When comparing cookies, simply see if the attributes are the same rather
   than subtracting them to produce a strcmp-style return[2].
 - Make the coherency data an arbitrary blob rather than a u64, but don't
   store it for the moment.

ver #2:
 - Fix error check[1].
 - Make a fscache_acquire_volume() return errors, including EBUSY if a
   conflicting volume cookie already exists.  No error is printed now -
   that's left to the netfs.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/20211203095608.GC2480@kili/ [1]
Link: https://lore.kernel.org/r/CAHk-=whtkzB446+hX0zdLsdcUJsJ=8_-0S1mE_R+YurThfUbLA@mail.gmail.com/ [2]
Link: https://lore.kernel.org/r/20211220224646.30e8205c@canb.auug.org.au/ [3]
Link: https://lore.kernel.org/r/163819588944.215744.1629085755564865996.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906890630.143852.13972180614535611154.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967086836.1823006.8191672796841981763.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021495816.640689.4403156093668590217.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h        | 84 ++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/fscache.h | 61 +++++++++++++++++++++++++++++-
 2 files changed, 144 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 1cf90c252aac..131a741a6652 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -20,13 +20,97 @@
 #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
 #define __fscache_available (1)
 #define fscache_available() (1)
+#define fscache_volume_valid(volume) (volume)
 #define fscache_cookie_valid(cookie) (cookie)
 #define fscache_cookie_enabled(cookie) (cookie)
 #else
 #define __fscache_available (0)
 #define fscache_available() (0)
+#define fscache_volume_valid(volume) (0)
 #define fscache_cookie_valid(cookie) (0)
 #define fscache_cookie_enabled(cookie) (0)
 #endif
 
+/*
+ * Volume representation cookie.
+ */
+struct fscache_volume {
+	refcount_t			ref;
+	atomic_t			n_cookies;	/* Number of data cookies in volume */
+	atomic_t			n_accesses;	/* Number of cache accesses in progress */
+	unsigned int			debug_id;
+	unsigned int			key_hash;	/* Hash of key string */
+	char				*key;		/* Volume ID, eg. "afs@example.com@1234" */
+	struct list_head		proc_link;	/* Link in /proc/fs/fscache/volumes */
+	struct hlist_bl_node		hash_link;	/* Link in hash table */
+	struct work_struct		work;
+	struct fscache_cache		*cache;		/* The cache in which this resides */
+	void				*cache_priv;	/* Cache private data */
+	spinlock_t			lock;
+	unsigned long			flags;
+#define FSCACHE_VOLUME_RELINQUISHED	0	/* Volume is being cleaned up */
+#define FSCACHE_VOLUME_INVALIDATE	1	/* Volume was invalidated */
+#define FSCACHE_VOLUME_COLLIDED_WITH	2	/* Volume was collided with */
+#define FSCACHE_VOLUME_ACQUIRE_PENDING	3	/* Volume is waiting to complete acquisition */
+#define FSCACHE_VOLUME_CREATING		4	/* Volume is being created on disk */
+};
+
+/*
+ * slow-path functions for when there is actually caching available, and the
+ * netfs does actually have a valid token
+ * - these are not to be called directly
+ * - these are undefined symbols when FS-Cache is not configured and the
+ *   optimiser takes care of not using them
+ */
+extern struct fscache_volume *__fscache_acquire_volume(const char *, const char *,
+						       const void *, size_t);
+extern void __fscache_relinquish_volume(struct fscache_volume *, const void *, bool);
+
+/**
+ * fscache_acquire_volume - Register a volume as desiring caching services
+ * @volume_key: An identification string for the volume
+ * @cache_name: The name of the cache to use (or NULL for the default)
+ * @coherency_data: Piece of arbitrary coherency data to check (or NULL)
+ * @coherency_len: The size of the coherency data
+ *
+ * Register a volume as desiring caching services if they're available.  The
+ * caller must provide an identifier for the volume and may also indicate which
+ * cache it should be in.  If a preexisting volume entry is found in the cache,
+ * the coherency data must match otherwise the entry will be invalidated.
+ *
+ * Returns a cookie pointer on success, -ENOMEM if out of memory or -EBUSY if a
+ * cache volume of that name is already acquired.  Note that "NULL" is a valid
+ * cookie pointer and can be returned if caching is refused.
+ */
+static inline
+struct fscache_volume *fscache_acquire_volume(const char *volume_key,
+					      const char *cache_name,
+					      const void *coherency_data,
+					      size_t coherency_len)
+{
+	if (!fscache_available())
+		return NULL;
+	return __fscache_acquire_volume(volume_key, cache_name,
+					coherency_data, coherency_len);
+}
+
+/**
+ * fscache_relinquish_volume - Cease caching a volume
+ * @volume: The volume cookie
+ * @coherency_data: Piece of arbitrary coherency data to set (or NULL)
+ * @invalidate: True if the volume should be invalidated
+ *
+ * Indicate that a filesystem no longer desires caching services for a volume.
+ * The caller must have relinquished all file cookies prior to calling this.
+ * The stored coherency data is updated.
+ */
+static inline
+void fscache_relinquish_volume(struct fscache_volume *volume,
+			       const void *coherency_data,
+			       bool invalidate)
+{
+	if (fscache_volume_valid(volume))
+		__fscache_relinquish_volume(volume, coherency_data, invalidate);
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 3b8e0597b2c1..eeb3e7d88e20 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -23,9 +23,26 @@ enum fscache_cache_trace {
 	fscache_cache_collision,
 	fscache_cache_get_acquire,
 	fscache_cache_new_acquire,
+	fscache_cache_put_alloc_volume,
 	fscache_cache_put_cache,
 	fscache_cache_put_prep_failed,
 	fscache_cache_put_relinquish,
+	fscache_cache_put_volume,
+};
+
+enum fscache_volume_trace {
+	fscache_volume_collision,
+	fscache_volume_get_cookie,
+	fscache_volume_get_create_work,
+	fscache_volume_get_hash_collision,
+	fscache_volume_free,
+	fscache_volume_new_acquire,
+	fscache_volume_put_cookie,
+	fscache_volume_put_create_work,
+	fscache_volume_put_hash_collision,
+	fscache_volume_put_relinquish,
+	fscache_volume_see_create_work,
+	fscache_volume_see_hash_wake,
 };
 
 #endif
@@ -37,9 +54,25 @@ enum fscache_cache_trace {
 	EM(fscache_cache_collision,		"*COLLIDE*")		\
 	EM(fscache_cache_get_acquire,		"GET acq  ")		\
 	EM(fscache_cache_new_acquire,		"NEW acq  ")		\
+	EM(fscache_cache_put_alloc_volume,	"PUT alvol")		\
 	EM(fscache_cache_put_cache,		"PUT cache")		\
 	EM(fscache_cache_put_prep_failed,	"PUT pfail")		\
-	E_(fscache_cache_put_relinquish,	"PUT relnq")
+	EM(fscache_cache_put_relinquish,	"PUT relnq")		\
+	E_(fscache_cache_put_volume,		"PUT vol  ")
+
+#define fscache_volume_traces						\
+	EM(fscache_volume_collision,		"*COLLIDE*")		\
+	EM(fscache_volume_get_cookie,		"GET cook ")		\
+	EM(fscache_volume_get_create_work,	"GET creat")		\
+	EM(fscache_volume_get_hash_collision,	"GET hcoll")		\
+	EM(fscache_volume_free,			"FREE     ")		\
+	EM(fscache_volume_new_acquire,		"NEW acq  ")		\
+	EM(fscache_volume_put_cookie,		"PUT cook ")		\
+	EM(fscache_volume_put_create_work,	"PUT creat")		\
+	EM(fscache_volume_put_hash_collision,	"PUT hcoll")		\
+	EM(fscache_volume_put_relinquish,	"PUT relnq")		\
+	EM(fscache_volume_see_create_work,	"SEE creat")		\
+	E_(fscache_volume_see_hash_wake,	"SEE hwake")
 
 /*
  * Export enum symbols via userspace.
@@ -50,6 +83,7 @@ enum fscache_cache_trace {
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
 fscache_cache_traces;
+fscache_volume_traces;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -86,6 +120,31 @@ TRACE_EVENT(fscache_cache,
 		      __entry->usage)
 	    );
 
+TRACE_EVENT(fscache_volume,
+	    TP_PROTO(unsigned int volume_debug_id,
+		     int usage,
+		     enum fscache_volume_trace where),
+
+	    TP_ARGS(volume_debug_id, usage, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		volume		)
+		    __field(int,			usage		)
+		    __field(enum fscache_volume_trace,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->volume	= volume_debug_id;
+		    __entry->usage	= usage;
+		    __entry->where	= where;
+			   ),
+
+	    TP_printk("V=%08x %s u=%d",
+		      __entry->volume,
+		      __print_symbolic(__entry->where, fscache_volume_traces),
+		      __entry->usage)
+	    );
+
 #endif /* _TRACE_FSCACHE_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 7f3283aba39a0f395700c3b5defa4ec49d9914b3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:53:34 +0100
Subject: fscache: Implement cookie registration

Add functions to the fscache API to allow data file cookies to be acquired
and relinquished by the network filesystem.  It is intended that the
filesystem will create such cookies per-inode under a volume.

To request a cookie, the filesystem should call:

	struct fscache_cookie *
	fscache_acquire_cookie(struct fscache_volume *volume,
			       u8 advice,
			       const void *index_key,
			       size_t index_key_len,
			       const void *aux_data,
			       size_t aux_data_len,
			       loff_t object_size)


The filesystem must first have created a volume cookie, which is passed in
here.  If it passes in NULL then the function will just return a NULL
cookie.

A binary key should be passed in index_key and is of size index_key_len.
This is saved in the cookie and is used to locate the associated data in
the cache.

A coherency data buffer of size aux_data_len will be allocated and
initialised from the buffer pointed to by aux_data.  This is used to
validate cache objects when they're opened and is stored on disk with them
when they're committed.  The data is stored in the cookie and will be
updateable by various functions in later patches.

The object_size must also be given.  This is also used to perform a
coherency check and to size the backing storage appropriately.

This function disallows a cookie from being acquired twice in parallel,
though it will cause the second user to wait if the first is busy
relinquishing its cookie.


When a network filesystem has finished with a cookie, it should call:

	void
	fscache_relinquish_cookie(struct fscache_volume *volume,
				  bool retire)

If retire is true, any backing data will be discarded immediately.

Changes
=======
ver #3:
 - fscache_hash()'s size parameter is now in bytes.  Use __le32 as the unit
   to round up to.
 - When comparing cookies, simply see if the attributes are the same rather
   than subtracting them to produce a strcmp-style return[1].
 - Add a check to see if the cookie is still hashed at the point of
   freeing.

ver #2:
 - Don't hold n_accesses elevated whilst cache is bound to a cookie, but
   rather add a flag that prevents the state machine from being queued when
   n_accesses reaches 0.
 - Remove the unused cookie pointer field from the fscache_acquire
   tracepoint.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/CAHk-=whtkzB446+hX0zdLsdcUJsJ=8_-0S1mE_R+YurThfUbLA@mail.gmail.com/ [1]
Link: https://lore.kernel.org/r/163819590658.215744.14934902514281054323.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906891983.143852.6219772337558577395.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967088507.1823006.12659006350221417165.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021498432.640689.12743483856927722772.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  22 +++++++
 include/linux/fscache.h        | 134 +++++++++++++++++++++++++++++++++++++++++
 include/trace/events/fscache.h | 111 ++++++++++++++++++++++++++++++++++
 3 files changed, 267 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 18cd5c9877bb..c4355b888c91 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -17,6 +17,7 @@
 #include <linux/fscache.h>
 
 enum fscache_cache_trace;
+enum fscache_cookie_trace;
 enum fscache_access_trace;
 
 enum fscache_cache_state {
@@ -52,4 +53,25 @@ extern struct rw_semaphore fscache_addremove_sem;
 extern struct fscache_cache *fscache_acquire_cache(const char *name);
 extern void fscache_relinquish_cache(struct fscache_cache *cache);
 
+extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie,
+						 enum fscache_cookie_trace where);
+extern void fscache_put_cookie(struct fscache_cookie *cookie,
+			       enum fscache_cookie_trace where);
+extern void fscache_set_cookie_state(struct fscache_cookie *cookie,
+				     enum fscache_cookie_state state);
+
+/**
+ * fscache_get_key - Get a pointer to the cookie key
+ * @cookie: The cookie to query
+ *
+ * Return a pointer to the where a cookie's key is stored.
+ */
+static inline void *fscache_get_key(struct fscache_cookie *cookie)
+{
+	if (cookie->key_len <= sizeof(cookie->inline_key))
+		return cookie->inline_key;
+	else
+		return cookie->key;
+}
+
 #endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 131a741a6652..4450d17c11e8 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -31,6 +31,27 @@
 #define fscache_cookie_enabled(cookie) (0)
 #endif
 
+struct fscache_cookie;
+
+#define FSCACHE_ADV_SINGLE_CHUNK	0x01 /* The object is a single chunk of data */
+#define FSCACHE_ADV_WRITE_CACHE		0x00 /* Do cache if written to locally */
+#define FSCACHE_ADV_WRITE_NOCACHE	0x02 /* Don't cache if written to locally */
+
+/*
+ * Data object state.
+ */
+enum fscache_cookie_state {
+	FSCACHE_COOKIE_STATE_QUIESCENT,		/* The cookie is uncached */
+	FSCACHE_COOKIE_STATE_LOOKING_UP,	/* The cache object is being looked up */
+	FSCACHE_COOKIE_STATE_CREATING,		/* The cache object is being created */
+	FSCACHE_COOKIE_STATE_ACTIVE,		/* The cache is active, readable and writable */
+	FSCACHE_COOKIE_STATE_FAILED,		/* The cache failed, withdraw to clear */
+	FSCACHE_COOKIE_STATE_WITHDRAWING,	/* The cookie is being withdrawn */
+	FSCACHE_COOKIE_STATE_RELINQUISHING,	/* The cookie is being relinquished */
+	FSCACHE_COOKIE_STATE_DROPPED,		/* The cookie has been dropped */
+#define FSCACHE_COOKIE_STATE__NR (FSCACHE_COOKIE_STATE_DROPPED + 1)
+} __attribute__((mode(byte)));
+
 /*
  * Volume representation cookie.
  */
@@ -55,6 +76,60 @@ struct fscache_volume {
 #define FSCACHE_VOLUME_CREATING		4	/* Volume is being created on disk */
 };
 
+/*
+ * Data file representation cookie.
+ * - a file will only appear in one cache
+ * - a request to cache a file may or may not be honoured, subject to
+ *   constraints such as disk space
+ * - indices are created on disk just-in-time
+ */
+struct fscache_cookie {
+	refcount_t			ref;
+	atomic_t			n_active;	/* number of active users of cookie */
+	atomic_t			n_accesses;	/* Number of cache accesses in progress */
+	unsigned int			debug_id;
+	unsigned int			inval_counter;	/* Number of invalidations made */
+	spinlock_t			lock;
+	struct fscache_volume		*volume;	/* Parent volume of this file. */
+	void				*cache_priv;	/* Cache-side representation */
+	struct hlist_bl_node		hash_link;	/* Link in hash table */
+	struct list_head		proc_link;	/* Link in proc list */
+	struct list_head		commit_link;	/* Link in commit queue */
+	struct work_struct		work;		/* Commit/relinq/withdraw work */
+	loff_t				object_size;	/* Size of the netfs object */
+	unsigned long			unused_at;	/* Time at which unused (jiffies) */
+	unsigned long			flags;
+#define FSCACHE_COOKIE_RELINQUISHED	0		/* T if cookie has been relinquished */
+#define FSCACHE_COOKIE_RETIRED		1		/* T if this cookie has retired on relinq */
+#define FSCACHE_COOKIE_IS_CACHING	2		/* T if this cookie is cached */
+#define FSCACHE_COOKIE_NO_DATA_TO_READ	3		/* T if this cookie has nothing to read */
+#define FSCACHE_COOKIE_NEEDS_UPDATE	4		/* T if attrs have been updated */
+#define FSCACHE_COOKIE_HAS_BEEN_CACHED	5		/* T if cookie needs withdraw-on-relinq */
+#define FSCACHE_COOKIE_DISABLED		6		/* T if cookie has been disabled */
+#define FSCACHE_COOKIE_LOCAL_WRITE	7		/* T if cookie has been modified locally */
+#define FSCACHE_COOKIE_NO_ACCESS_WAKE	8		/* T if no wake when n_accesses goes 0 */
+#define FSCACHE_COOKIE_DO_RELINQUISH	9		/* T if this cookie needs relinquishment */
+#define FSCACHE_COOKIE_DO_WITHDRAW	10		/* T if this cookie needs withdrawing */
+#define FSCACHE_COOKIE_DO_LRU_DISCARD	11		/* T if this cookie needs LRU discard */
+#define FSCACHE_COOKIE_DO_PREP_TO_WRITE	12		/* T if cookie needs write preparation */
+#define FSCACHE_COOKIE_HAVE_DATA	13		/* T if this cookie has data stored */
+#define FSCACHE_COOKIE_IS_HASHED	14		/* T if this cookie is hashed */
+
+	enum fscache_cookie_state	state;
+	u8				advice;		/* FSCACHE_ADV_* */
+	u8				key_len;	/* Length of index key */
+	u8				aux_len;	/* Length of auxiliary data */
+	u32				key_hash;	/* Hash of volume, key, len */
+	union {
+		void			*key;		/* Index key */
+		u8			inline_key[16];	/* - If the key is short enough */
+	};
+	union {
+		void			*aux;		/* Auxiliary data */
+		u8			inline_aux[8];	/* - If the aux data is short enough */
+	};
+};
+
 /*
  * slow-path functions for when there is actually caching available, and the
  * netfs does actually have a valid token
@@ -66,6 +141,14 @@ extern struct fscache_volume *__fscache_acquire_volume(const char *, const char
 						       const void *, size_t);
 extern void __fscache_relinquish_volume(struct fscache_volume *, const void *, bool);
 
+extern struct fscache_cookie *__fscache_acquire_cookie(
+	struct fscache_volume *,
+	u8,
+	const void *, size_t,
+	const void *, size_t,
+	loff_t);
+extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
+
 /**
  * fscache_acquire_volume - Register a volume as desiring caching services
  * @volume_key: An identification string for the volume
@@ -113,4 +196,55 @@ void fscache_relinquish_volume(struct fscache_volume *volume,
 		__fscache_relinquish_volume(volume, coherency_data, invalidate);
 }
 
+/**
+ * fscache_acquire_cookie - Acquire a cookie to represent a cache object
+ * @volume: The volume in which to locate/create this cookie
+ * @advice: Advice flags (FSCACHE_COOKIE_ADV_*)
+ * @index_key: The index key for this cookie
+ * @index_key_len: Size of the index key
+ * @aux_data: The auxiliary data for the cookie (may be NULL)
+ * @aux_data_len: Size of the auxiliary data buffer
+ * @object_size: The initial size of object
+ *
+ * Acquire a cookie to represent a data file within the given cache volume.
+ *
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
+ * description.
+ */
+static inline
+struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume,
+					      u8 advice,
+					      const void *index_key,
+					      size_t index_key_len,
+					      const void *aux_data,
+					      size_t aux_data_len,
+					      loff_t object_size)
+{
+	if (!fscache_volume_valid(volume))
+		return NULL;
+	return __fscache_acquire_cookie(volume, advice,
+					index_key, index_key_len,
+					aux_data, aux_data_len,
+					object_size);
+}
+
+/**
+ * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding
+ * it
+ * @cookie: The cookie being returned
+ * @retire: True if the cache object the cookie represents is to be discarded
+ *
+ * This function returns a cookie to the cache, forcibly discarding the
+ * associated cache object if retire is set to true.
+ *
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
+ * description.
+ */
+static inline
+void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_relinquish_cookie(cookie, retire);
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index eeb3e7d88e20..9286e1c4b2ac 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -45,6 +45,23 @@ enum fscache_volume_trace {
 	fscache_volume_see_hash_wake,
 };
 
+enum fscache_cookie_trace {
+	fscache_cookie_collision,
+	fscache_cookie_discard,
+	fscache_cookie_get_end_access,
+	fscache_cookie_get_hash_collision,
+	fscache_cookie_new_acquire,
+	fscache_cookie_put_hash_collision,
+	fscache_cookie_put_over_queued,
+	fscache_cookie_put_relinquish,
+	fscache_cookie_put_withdrawn,
+	fscache_cookie_put_work,
+	fscache_cookie_see_active,
+	fscache_cookie_see_relinquish,
+	fscache_cookie_see_withdraw,
+	fscache_cookie_see_work,
+};
+
 #endif
 
 /*
@@ -74,6 +91,22 @@ enum fscache_volume_trace {
 	EM(fscache_volume_see_create_work,	"SEE creat")		\
 	E_(fscache_volume_see_hash_wake,	"SEE hwake")
 
+#define fscache_cookie_traces						\
+	EM(fscache_cookie_collision,		"*COLLIDE*")		\
+	EM(fscache_cookie_discard,		"DISCARD  ")		\
+	EM(fscache_cookie_get_hash_collision,	"GET hcoll")		\
+	EM(fscache_cookie_get_end_access,	"GQ  endac")		\
+	EM(fscache_cookie_new_acquire,		"NEW acq  ")		\
+	EM(fscache_cookie_put_hash_collision,	"PUT hcoll")		\
+	EM(fscache_cookie_put_over_queued,	"PQ  overq")		\
+	EM(fscache_cookie_put_relinquish,	"PUT relnq")		\
+	EM(fscache_cookie_put_withdrawn,	"PUT wthdn")		\
+	EM(fscache_cookie_put_work,		"PQ  work ")		\
+	EM(fscache_cookie_see_active,		"-   activ")		\
+	EM(fscache_cookie_see_relinquish,	"-   x-rlq")		\
+	EM(fscache_cookie_see_withdraw,		"-   x-wth")		\
+	E_(fscache_cookie_see_work,		"-   work ")
+
 /*
  * Export enum symbols via userspace.
  */
@@ -84,6 +117,7 @@ enum fscache_volume_trace {
 
 fscache_cache_traces;
 fscache_volume_traces;
+fscache_cookie_traces;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -145,6 +179,83 @@ TRACE_EVENT(fscache_volume,
 		      __entry->usage)
 	    );
 
+TRACE_EVENT(fscache_cookie,
+	    TP_PROTO(unsigned int cookie_debug_id,
+		     int ref,
+		     enum fscache_cookie_trace where),
+
+	    TP_ARGS(cookie_debug_id, ref, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(int,			ref		)
+		    __field(enum fscache_cookie_trace,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie_debug_id;
+		    __entry->ref	= ref;
+		    __entry->where	= where;
+			   ),
+
+	    TP_printk("c=%08x %s r=%d",
+		      __entry->cookie,
+		      __print_symbolic(__entry->where, fscache_cookie_traces),
+		      __entry->ref)
+	    );
+
+TRACE_EVENT(fscache_acquire,
+	    TP_PROTO(struct fscache_cookie *cookie),
+
+	    TP_ARGS(cookie),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		volume		)
+		    __field(int,			v_ref		)
+		    __field(int,			v_n_cookies	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie->debug_id;
+		    __entry->volume		= cookie->volume->debug_id;
+		    __entry->v_ref		= refcount_read(&cookie->volume->ref);
+		    __entry->v_n_cookies	= atomic_read(&cookie->volume->n_cookies);
+			   ),
+
+	    TP_printk("c=%08x V=%08x vr=%d vc=%d",
+		      __entry->cookie,
+		      __entry->volume, __entry->v_ref, __entry->v_n_cookies)
+	    );
+
+TRACE_EVENT(fscache_relinquish,
+	    TP_PROTO(struct fscache_cookie *cookie, bool retire),
+
+	    TP_ARGS(cookie, retire),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		volume		)
+		    __field(int,			ref		)
+		    __field(int,			n_active	)
+		    __field(u8,				flags		)
+		    __field(bool,			retire		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie->debug_id;
+		    __entry->volume	= cookie->volume->debug_id;
+		    __entry->ref	= refcount_read(&cookie->ref);
+		    __entry->n_active	= atomic_read(&cookie->n_active);
+		    __entry->flags	= cookie->flags;
+		    __entry->retire	= retire;
+			   ),
+
+	    TP_printk("c=%08x V=%08x r=%d U=%d f=%02x rt=%u",
+		      __entry->cookie, __entry->volume, __entry->ref,
+		      __entry->n_active, __entry->flags, __entry->retire)
+	    );
+
 #endif /* _TRACE_FSCACHE_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 23e12e285a6ab7320a8bceead29cfe13190a6e3c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:00:26 +0100
Subject: fscache: Implement cache-level access helpers

Add a pair of functions to pin/unpin a cache that we're wanting to do a
high-level access to (such as creating or removing a volume):

	bool fscache_begin_cache_access(struct fscache_cache *cache,
					enum fscache_access_trace why);
	void fscache_end_cache_access(struct fscache_cache *cache,
				      enum fscache_access_trace why);

The way the access gate works/will work is:

 (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE),
     then we return false to indicate access was not permitted.

 (2) If the cache tests as live, then we increment the n_accesses count and
     then recheck the liveness, ending the access if it ceased to be live.

 (3) When we end the access, we decrement n_accesses and wake up the any
     waiters if it reaches 0.

 (4) Whilst the cache is caching, n_accesses is kept artificially
     incremented to prevent wakeups from happening.

 (5) When the cache is taken offline, the state is changed to prevent new
     accesses, n_accesses is decremented and we wait for n_accesses to
     become 0.

Note that some of this is implemented in a later patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819593239.215744.7537428720603638088.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906893368.143852.14164004598465617981.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967093977.1823006.6967886507023056409.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021499995.640689.18286203753480287850.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/fscache.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 9286e1c4b2ac..734966bc49e1 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -62,6 +62,12 @@ enum fscache_cookie_trace {
 	fscache_cookie_see_work,
 };
 
+enum fscache_access_trace {
+	fscache_access_cache_pin,
+	fscache_access_cache_unpin,
+	fscache_access_unlive,
+};
+
 #endif
 
 /*
@@ -107,6 +113,11 @@ enum fscache_cookie_trace {
 	EM(fscache_cookie_see_withdraw,		"-   x-wth")		\
 	E_(fscache_cookie_see_work,		"-   work ")
 
+#define fscache_access_traces		\
+	EM(fscache_access_cache_pin,		"PIN   cache  ")	\
+	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
+	E_(fscache_access_unlive,		"END   unlive ")
+
 /*
  * Export enum symbols via userspace.
  */
@@ -118,6 +129,7 @@ enum fscache_cookie_trace {
 fscache_cache_traces;
 fscache_volume_traces;
 fscache_cookie_traces;
+fscache_access_traces;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -204,6 +216,35 @@ TRACE_EVENT(fscache_cookie,
 		      __entry->ref)
 	    );
 
+TRACE_EVENT(fscache_access_cache,
+	    TP_PROTO(unsigned int cache_debug_id,
+		     int ref,
+		     int n_accesses,
+		     enum fscache_access_trace why),
+
+	    TP_ARGS(cache_debug_id, ref, n_accesses, why),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cache		)
+		    __field(int,			ref		)
+		    __field(int,			n_accesses	)
+		    __field(enum fscache_access_trace,	why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cache	= cache_debug_id;
+		    __entry->ref	= ref;
+		    __entry->n_accesses	= n_accesses;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("C=%08x %s r=%d a=%d",
+		      __entry->cache,
+		      __print_symbolic(__entry->why, fscache_access_traces),
+		      __entry->ref,
+		      __entry->n_accesses)
+	    );
+
 TRACE_EVENT(fscache_acquire,
 	    TP_PROTO(struct fscache_cookie *cookie),
 
-- 
cgit v1.2.3


From e6acd3299badbfb5fb0231d42481d4f5dedf5599 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:26:17 +0100
Subject: fscache: Implement volume-level access helpers

Add a pair of helper functions to manage access to a volume, pinning the
volume in place for the duration to prevent cache withdrawal from removing
it:

	bool fscache_begin_volume_access(struct fscache_volume *volume,
					 enum fscache_access_trace why);
	void fscache_end_volume_access(struct fscache_volume *volume,
				       enum fscache_access_trace why);

The way the access gate on the volume works/will work is:

  (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE),
      then we return false to indicate access was not permitted.

  (2) If the cache tests as live, then we increment the volume's n_accesses
      count and then recheck the cache liveness, ending the access if it
      ceased to be live.

  (3) When we end the access, we decrement the volume's n_accesses and wake
      up the any waiters if it reaches 0.

  (4) Whilst the cache is caching, the volume's n_accesses is kept
      artificially incremented to prevent wakeups from happening.

  (5) When the cache is taken offline, the state is changed to prevent new
      accesses, the volume's n_accesses is decremented and we wait for it to
      become 0.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819594158.215744.8285859817391683254.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906894315.143852.5454793807544710479.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967095028.1823006.9173132503876627466.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021501546.640689.9631510472149608443.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  4 ++++
 include/trace/events/fscache.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index c4355b888c91..fbbd8a2afe12 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -53,6 +53,10 @@ extern struct rw_semaphore fscache_addremove_sem;
 extern struct fscache_cache *fscache_acquire_cache(const char *name);
 extern void fscache_relinquish_cache(struct fscache_cache *cache);
 
+extern void fscache_end_volume_access(struct fscache_volume *volume,
+				      struct fscache_cookie *cookie,
+				      enum fscache_access_trace why);
+
 extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie,
 						 enum fscache_cookie_trace where);
 extern void fscache_put_cookie(struct fscache_cookie *cookie,
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 734966bc49e1..4f40cfa52469 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -43,6 +43,7 @@ enum fscache_volume_trace {
 	fscache_volume_put_relinquish,
 	fscache_volume_see_create_work,
 	fscache_volume_see_hash_wake,
+	fscache_volume_wait_create_work,
 };
 
 enum fscache_cookie_trace {
@@ -245,6 +246,39 @@ TRACE_EVENT(fscache_access_cache,
 		      __entry->n_accesses)
 	    );
 
+TRACE_EVENT(fscache_access_volume,
+	    TP_PROTO(unsigned int volume_debug_id,
+		     unsigned int cookie_debug_id,
+		     int ref,
+		     int n_accesses,
+		     enum fscache_access_trace why),
+
+	    TP_ARGS(volume_debug_id, cookie_debug_id, ref, n_accesses, why),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		volume		)
+		    __field(unsigned int,		cookie		)
+		    __field(int,			ref		)
+		    __field(int,			n_accesses	)
+		    __field(enum fscache_access_trace,	why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->volume	= volume_debug_id;
+		    __entry->cookie	= cookie_debug_id;
+		    __entry->ref	= ref;
+		    __entry->n_accesses	= n_accesses;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("V=%08x c=%08x %s r=%d a=%d",
+		      __entry->volume,
+		      __entry->cookie,
+		      __print_symbolic(__entry->why, fscache_access_traces),
+		      __entry->ref,
+		      __entry->n_accesses)
+	    );
+
 TRACE_EVENT(fscache_acquire,
 	    TP_PROTO(struct fscache_cookie *cookie),
 
-- 
cgit v1.2.3


From a7733fb632722a2f085f9324f14783effe268ed3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:53:34 +0100
Subject: fscache: Implement cookie-level access helpers

Add a number of helper functions to manage access to a cookie, pinning the
cache object in place for the duration to prevent cache withdrawal from
removing it:

 (1) void fscache_init_access_gate(struct fscache_cookie *cookie);

     This function initialises the access count when a cache binds to a
     cookie.  An extra ref is taken on the access count to prevent wakeups
     while the cache is active.  We're only interested in the wakeup when a
     cookie is being withdrawn and we're waiting for it to quiesce - at
     which point the counter will be decremented before the wait.

     The FSCACHE_COOKIE_NACC_ELEVATED flag is set on the cookie to keep
     track of the extra ref in order to handle a race between
     relinquishment and withdrawal both trying to drop the extra ref.

 (2) bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
				      enum fscache_access_trace why);

     This function attempts to begin access upon a cookie, pinning it in
     place if it's cached.  If successful, it returns true and leaves a the
     access count incremented.

 (3) void fscache_end_cookie_access(struct fscache_cookie *cookie,
				    enum fscache_access_trace why);

     This function drops the access count obtained by (2), permitting
     object withdrawal to take place when it reaches zero.

A tracepoint is provided to track changes to the access counter on a
cookie.

Changes
=======
ver #2:
 - Don't hold n_accesses elevated whilst cache is bound to a cookie, but
   rather add a flag that prevents the state machine from being queued when
   n_accesses reaches 0.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819595085.215744.1706073049250505427.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906895313.143852.10141619544149102193.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967095980.1823006.1133648159424418877.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021503063.640689.8870918985269528670.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  2 ++
 include/trace/events/fscache.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index fbbd8a2afe12..66624407ba84 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -61,6 +61,8 @@ extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie,
 						 enum fscache_cookie_trace where);
 extern void fscache_put_cookie(struct fscache_cookie *cookie,
 			       enum fscache_cookie_trace where);
+extern void fscache_end_cookie_access(struct fscache_cookie *cookie,
+				      enum fscache_access_trace why);
 extern void fscache_set_cookie_state(struct fscache_cookie *cookie,
 				     enum fscache_cookie_state state);
 
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 4f40cfa52469..b1a962adfd16 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -279,6 +279,35 @@ TRACE_EVENT(fscache_access_volume,
 		      __entry->n_accesses)
 	    );
 
+TRACE_EVENT(fscache_access,
+	    TP_PROTO(unsigned int cookie_debug_id,
+		     int ref,
+		     int n_accesses,
+		     enum fscache_access_trace why),
+
+	    TP_ARGS(cookie_debug_id, ref, n_accesses, why),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(int,			ref		)
+		    __field(int,			n_accesses	)
+		    __field(enum fscache_access_trace,	why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie_debug_id;
+		    __entry->ref	= ref;
+		    __entry->n_accesses	= n_accesses;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("c=%08x %s r=%d a=%d",
+		      __entry->cookie,
+		      __print_symbolic(__entry->why, fscache_access_traces),
+		      __entry->ref,
+		      __entry->n_accesses)
+	    );
+
 TRACE_EVENT(fscache_acquire,
 	    TP_PROTO(struct fscache_cookie *cookie),
 
-- 
cgit v1.2.3


From 2e0c76aee25f33c482abda6224bd87732359354d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:00:26 +0100
Subject: fscache: Implement functions add/remove a cache

Implement functions to allow the cache backend to add or remove a cache:

 (1) Declare a cache to be live:

	int fscache_add_cache(struct fscache_cache *cache,
			      const struct fscache_cache_ops *ops,
			      void *cache_priv);

     Take a previously acquired cache cookie, set the operations table and
     private data and mark the cache open for access.

 (2) Withdraw a cache from service:

	void fscache_withdraw_cache(struct fscache_cache *cache);

     This marks the cache as withdrawn and thus prevents further
     cache-level and volume-level accesses.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819596022.215744.8799712491432238827.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906896599.143852.17049208999019262884.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967097870.1823006.3470041000971522030.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021505541.640689.1819714759326331054.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 66624407ba84..f78add6e7823 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -33,6 +33,7 @@ enum fscache_cache_state {
  * Cache cookie.
  */
 struct fscache_cache {
+	const struct fscache_cache_ops *ops;
 	struct list_head	cache_link;	/* Link in cache list */
 	void			*cache_priv;	/* Private cache data (or NULL) */
 	refcount_t		ref;
@@ -44,6 +45,14 @@ struct fscache_cache {
 	char			*name;
 };
 
+/*
+ * cache operations
+ */
+struct fscache_cache_ops {
+	/* name of cache provider */
+	const char *name;
+};
+
 extern struct workqueue_struct *fscache_wq;
 
 /*
@@ -52,6 +61,10 @@ extern struct workqueue_struct *fscache_wq;
 extern struct rw_semaphore fscache_addremove_sem;
 extern struct fscache_cache *fscache_acquire_cache(const char *name);
 extern void fscache_relinquish_cache(struct fscache_cache *cache);
+extern int fscache_add_cache(struct fscache_cache *cache,
+			     const struct fscache_cache_ops *ops,
+			     void *cache_priv);
+extern void fscache_withdraw_cache(struct fscache_cache *cache);
 
 extern void fscache_end_volume_access(struct fscache_volume *volume,
 				      struct fscache_cookie *cookie,
-- 
cgit v1.2.3


From bfa22da3ed652aa15acd4246fa13a0de6dbe4a59 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:26:17 +0100
Subject: fscache: Provide and use cache methods to lookup/create/free a volume

Add cache methods to lookup, create and remove a volume.

Looking up or creating the volume requires the cache pinning for access;
freeing the volume requires the volume pinning for access.  The
->acquire_volume() method is used to ask the cache backend to lookup and,
if necessary, create a volume; the ->free_volume() method is used to free
the resources for a volume.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819597821.215744.5225318658134989949.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906898645.143852.8537799955945956818.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967099771.1823006.1455197910571061835.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021507345.640689.4073511598838843040.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  7 +++++++
 include/trace/events/fscache.h | 11 ++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index f78add6e7823..a10b66ca3544 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -51,6 +51,12 @@ struct fscache_cache {
 struct fscache_cache_ops {
 	/* name of cache provider */
 	const char *name;
+
+	/* Acquire a volume */
+	void (*acquire_volume)(struct fscache_volume *volume);
+
+	/* Free the cache's data attached to a volume */
+	void (*free_volume)(struct fscache_volume *volume);
 };
 
 extern struct workqueue_struct *fscache_wq;
@@ -65,6 +71,7 @@ extern int fscache_add_cache(struct fscache_cache *cache,
 			     const struct fscache_cache_ops *ops,
 			     void *cache_priv);
 extern void fscache_withdraw_cache(struct fscache_cache *cache);
+extern void fscache_withdraw_volume(struct fscache_volume *volume);
 
 extern void fscache_end_volume_access(struct fscache_volume *volume,
 				      struct fscache_cookie *cookie,
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index b1a962adfd16..1d576bd8112e 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -64,8 +64,12 @@ enum fscache_cookie_trace {
 };
 
 enum fscache_access_trace {
+	fscache_access_acquire_volume,
+	fscache_access_acquire_volume_end,
 	fscache_access_cache_pin,
 	fscache_access_cache_unpin,
+	fscache_access_relinquish_volume,
+	fscache_access_relinquish_volume_end,
 	fscache_access_unlive,
 };
 
@@ -96,7 +100,8 @@ enum fscache_access_trace {
 	EM(fscache_volume_put_hash_collision,	"PUT hcoll")		\
 	EM(fscache_volume_put_relinquish,	"PUT relnq")		\
 	EM(fscache_volume_see_create_work,	"SEE creat")		\
-	E_(fscache_volume_see_hash_wake,	"SEE hwake")
+	EM(fscache_volume_see_hash_wake,	"SEE hwake")		\
+	E_(fscache_volume_wait_create_work,	"WAIT crea")
 
 #define fscache_cookie_traces						\
 	EM(fscache_cookie_collision,		"*COLLIDE*")		\
@@ -115,8 +120,12 @@ enum fscache_access_trace {
 	E_(fscache_cookie_see_work,		"-   work ")
 
 #define fscache_access_traces		\
+	EM(fscache_access_acquire_volume,	"BEGIN acq_vol")	\
+	EM(fscache_access_acquire_volume_end,	"END   acq_vol")	\
 	EM(fscache_access_cache_pin,		"PIN   cache  ")	\
 	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
+	EM(fscache_access_relinquish_volume,	"BEGIN rlq_vol")	\
+	EM(fscache_access_relinquish_volume_end,"END   rlq_vol")	\
 	E_(fscache_access_unlive,		"END   unlive ")
 
 /*
-- 
cgit v1.2.3


From 29f18e79fe7c5f8011befeda9be6b220a350f947 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:00:26 +0100
Subject: fscache: Add a function for a cache backend to note an I/O error

Add a function to the backend API to note an I/O error in a cache.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819598741.215744.891281275151382095.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906901316.143852.15225412215771586528.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967100721.1823006.16435671567428949398.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021508840.640689.11902836226570620424.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index a10b66ca3544..936ef731bbc7 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -73,6 +73,8 @@ extern int fscache_add_cache(struct fscache_cache *cache,
 extern void fscache_withdraw_cache(struct fscache_cache *cache);
 extern void fscache_withdraw_volume(struct fscache_volume *volume);
 
+extern void fscache_io_error(struct fscache_cache *cache);
+
 extern void fscache_end_volume_access(struct fscache_volume *volume,
 				      struct fscache_cookie *cookie,
 				      enum fscache_access_trace why);
-- 
cgit v1.2.3


From 5d00e426f95e7ea036fec2a0aceb3f71d6dbdf92 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:53:34 +0100
Subject: fscache: Implement simple cookie state machine

Implement a very simple cookie state machine to handle lookup,
invalidation, withdrawal, relinquishment and, to be added later, commit on
LRU discard.

Three cache methods are provided: ->lookup_cookie() to look up and, if
necessary, create a data storage object; ->withdraw_cookie() to free the
resources associated with that object and potentially delete it; and
->prepare_to_write(), to do prepare for changes to the cached data to be
modified locally.

Changes
=======
ver #3:
 - Fix a race between LRU discard and relinquishment whereby the former
   would override the latter and thus the latter would never happen[1].

ver #2:
 - Don't hold n_accesses elevated whilst cache is bound to a cookie, but
   rather add a flag that prevents the state machine from being queued when
   n_accesses reaches 0.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/599331.1639410068@warthog.procyon.org.uk/ [1]
Link: https://lore.kernel.org/r/163819599657.215744.15799615296912341745.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906903925.143852.1805855338154353867.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967105456.1823006.14730395299835841776.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021510706.640689.7961423370243272583.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  | 27 +++++++++++++++++++++++++--
 include/trace/events/fscache.h |  4 ++++
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 936ef731bbc7..ae6a75976450 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -57,6 +57,15 @@ struct fscache_cache_ops {
 
 	/* Free the cache's data attached to a volume */
 	void (*free_volume)(struct fscache_volume *volume);
+
+	/* Look up a cookie in the cache */
+	bool (*lookup_cookie)(struct fscache_cookie *cookie);
+
+	/* Withdraw an object without any cookie access counts held */
+	void (*withdraw_cookie)(struct fscache_cookie *cookie);
+
+	/* Prepare to write to a live cache object */
+	void (*prepare_to_write)(struct fscache_cookie *cookie);
 };
 
 extern struct workqueue_struct *fscache_wq;
@@ -72,6 +81,7 @@ extern int fscache_add_cache(struct fscache_cache *cache,
 			     void *cache_priv);
 extern void fscache_withdraw_cache(struct fscache_cache *cache);
 extern void fscache_withdraw_volume(struct fscache_volume *volume);
+extern void fscache_withdraw_cookie(struct fscache_cookie *cookie);
 
 extern void fscache_io_error(struct fscache_cache *cache);
 
@@ -85,8 +95,21 @@ extern void fscache_put_cookie(struct fscache_cookie *cookie,
 			       enum fscache_cookie_trace where);
 extern void fscache_end_cookie_access(struct fscache_cookie *cookie,
 				      enum fscache_access_trace why);
-extern void fscache_set_cookie_state(struct fscache_cookie *cookie,
-				     enum fscache_cookie_state state);
+extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie);
+extern void fscache_caching_failed(struct fscache_cookie *cookie);
+
+/**
+ * fscache_cookie_state - Read the state of a cookie
+ * @cookie: The cookie to query
+ *
+ * Get the state of a cookie, imposing an ordering between the cookie contents
+ * and the state value.  Paired with fscache_set_cookie_state().
+ */
+static inline
+enum fscache_cookie_state fscache_cookie_state(struct fscache_cookie *cookie)
+{
+	return smp_load_acquire(&cookie->state);
+}
 
 /**
  * fscache_get_key - Get a pointer to the cookie key
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 1d576bd8112e..030c97bb9c8b 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -68,6 +68,8 @@ enum fscache_access_trace {
 	fscache_access_acquire_volume_end,
 	fscache_access_cache_pin,
 	fscache_access_cache_unpin,
+	fscache_access_lookup_cookie_end,
+	fscache_access_lookup_cookie_end_failed,
 	fscache_access_relinquish_volume,
 	fscache_access_relinquish_volume_end,
 	fscache_access_unlive,
@@ -124,6 +126,8 @@ enum fscache_access_trace {
 	EM(fscache_access_acquire_volume_end,	"END   acq_vol")	\
 	EM(fscache_access_cache_pin,		"PIN   cache  ")	\
 	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
+	EM(fscache_access_lookup_cookie_end,	"END   lookup ")	\
+	EM(fscache_access_lookup_cookie_end_failed,"END   lookupf")	\
 	EM(fscache_access_relinquish_volume,	"BEGIN rlq_vol")	\
 	EM(fscache_access_relinquish_volume_end,"END   rlq_vol")	\
 	E_(fscache_access_unlive,		"END   unlive ")
-- 
cgit v1.2.3


From 12bb21a29c19aae50cfad4e2bb5c943108f34a7d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:53:34 +0100
Subject: fscache: Implement cookie user counting and resource pinning

Provide a pair of functions to count the number of users of a cookie (open
files, writeback, invalidation, resizing, reads, writes), to obtain and pin
resources for the cookie and to prevent culling for the whilst there are
users.

The first function marks a cookie as being in use:

	void fscache_use_cookie(struct fscache_cookie *cookie,
				bool will_modify);

The caller should indicate the cookie to use and whether or not the caller
is in a context that may modify the cookie (e.g. a file open O_RDWR).

If the cookie is not already resourced, fscache will ask the cache backend
in the background to do whatever it needs to look up, create or otherwise
obtain the resources necessary to access data.  This is pinned to the
cookie and may not be culled, though it may be withdrawn if the cache as a
whole is withdrawn.

The second function removes the in-use mark from a cookie and, optionally,
updates the coherency data:

	void fscache_unuse_cookie(struct fscache_cookie *cookie,
				  const void *aux_data,
				  const loff_t *object_size);

If non-NULL, the aux_data buffer and/or the object_size will be saved into
the cookie and will be set on the backing store when the object is
committed.

If this removes the last usage on a cookie, the cookie is placed onto an
LRU list from which it will be removed and closed after a couple of seconds
if it doesn't get reused.  This prevents resource overload in the cache -
in particular it prevents it from holding too many files open.

Changes
=======
ver #2:
 - Fix fscache_unuse_cookie() to use atomic_dec_and_lock() to avoid a
   potential race if the cookie gets reused before it completes the
   unusement.
 - Added missing transition to LRU_DISCARDING state.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819600612.215744.13678350304176542741.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906907567.143852.16979631199380722019.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967106467.1823006.6790864931048582667.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021511674.640689.10084988363699111860.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h        | 82 +++++++++++++++++++++++++++++++++++++++++-
 include/trace/events/fscache.h | 12 +++++++
 2 files changed, 93 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 4450d17c11e8..e6c321e5bf73 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -22,12 +22,14 @@
 #define fscache_available() (1)
 #define fscache_volume_valid(volume) (volume)
 #define fscache_cookie_valid(cookie) (cookie)
-#define fscache_cookie_enabled(cookie) (cookie)
+#define fscache_resources_valid(cres) ((cres)->cache_priv)
+#define fscache_cookie_enabled(cookie) (cookie && !test_bit(FSCACHE_COOKIE_DISABLED, &cookie->flags))
 #else
 #define __fscache_available (0)
 #define fscache_available() (0)
 #define fscache_volume_valid(volume) (0)
 #define fscache_cookie_valid(cookie) (0)
+#define fscache_resources_valid(cres) (false)
 #define fscache_cookie_enabled(cookie) (0)
 #endif
 
@@ -46,6 +48,7 @@ enum fscache_cookie_state {
 	FSCACHE_COOKIE_STATE_CREATING,		/* The cache object is being created */
 	FSCACHE_COOKIE_STATE_ACTIVE,		/* The cache is active, readable and writable */
 	FSCACHE_COOKIE_STATE_FAILED,		/* The cache failed, withdraw to clear */
+	FSCACHE_COOKIE_STATE_LRU_DISCARDING,	/* The cookie is being discarded by the LRU */
 	FSCACHE_COOKIE_STATE_WITHDRAWING,	/* The cookie is being withdrawn */
 	FSCACHE_COOKIE_STATE_RELINQUISHING,	/* The cookie is being relinquished */
 	FSCACHE_COOKIE_STATE_DROPPED,		/* The cookie has been dropped */
@@ -147,6 +150,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie(
 	const void *, size_t,
 	const void *, size_t,
 	loff_t);
+extern void __fscache_use_cookie(struct fscache_cookie *, bool);
+extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *);
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
 
 /**
@@ -228,6 +233,39 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume,
 					object_size);
 }
 
+/**
+ * fscache_use_cookie - Request usage of cookie attached to an object
+ * @object: Object description
+ * @will_modify: If cache is expected to be modified locally
+ *
+ * Request usage of the cookie attached to an object.  The caller should tell
+ * the cache if the object's contents are about to be modified locally and then
+ * the cache can apply the policy that has been set to handle this case.
+ */
+static inline void fscache_use_cookie(struct fscache_cookie *cookie,
+				      bool will_modify)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_use_cookie(cookie, will_modify);
+}
+
+/**
+ * fscache_unuse_cookie - Cease usage of cookie attached to an object
+ * @object: Object description
+ * @aux_data: Updated auxiliary data (or NULL)
+ * @object_size: Revised size of the object (or NULL)
+ *
+ * Cease usage of the cookie attached to an object.  When the users count
+ * reaches zero then the cookie relinquishment will be permitted to proceed.
+ */
+static inline void fscache_unuse_cookie(struct fscache_cookie *cookie,
+					const void *aux_data,
+					const loff_t *object_size)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_unuse_cookie(cookie, aux_data, object_size);
+}
+
 /**
  * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding
  * it
@@ -247,4 +285,46 @@ void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
 		__fscache_relinquish_cookie(cookie, retire);
 }
 
+/*
+ * Find the auxiliary data on a cookie.
+ */
+static inline void *fscache_get_aux(struct fscache_cookie *cookie)
+{
+	if (cookie->aux_len <= sizeof(cookie->inline_aux))
+		return cookie->inline_aux;
+	else
+		return cookie->aux;
+}
+
+/*
+ * Update the auxiliary data on a cookie.
+ */
+static inline
+void fscache_update_aux(struct fscache_cookie *cookie,
+			const void *aux_data, const loff_t *object_size)
+{
+	void *p = fscache_get_aux(cookie);
+
+	if (aux_data && p)
+		memcpy(p, aux_data, cookie->aux_len);
+	if (object_size)
+		cookie->object_size = *object_size;
+}
+
+#ifdef CONFIG_FSCACHE_STATS
+extern atomic_t fscache_n_updates;
+#endif
+
+static inline
+void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data,
+			     const loff_t *object_size)
+{
+#ifdef CONFIG_FSCACHE_STATS
+	atomic_inc(&fscache_n_updates);
+#endif
+	fscache_update_aux(cookie, aux_data, object_size);
+	smp_wmb();
+	set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags);
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 030c97bb9c8b..b0409b1fad23 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -51,13 +51,18 @@ enum fscache_cookie_trace {
 	fscache_cookie_discard,
 	fscache_cookie_get_end_access,
 	fscache_cookie_get_hash_collision,
+	fscache_cookie_get_lru,
+	fscache_cookie_get_use_work,
 	fscache_cookie_new_acquire,
 	fscache_cookie_put_hash_collision,
+	fscache_cookie_put_lru,
 	fscache_cookie_put_over_queued,
 	fscache_cookie_put_relinquish,
 	fscache_cookie_put_withdrawn,
 	fscache_cookie_put_work,
 	fscache_cookie_see_active,
+	fscache_cookie_see_lru_discard,
+	fscache_cookie_see_lru_do_one,
 	fscache_cookie_see_relinquish,
 	fscache_cookie_see_withdraw,
 	fscache_cookie_see_work,
@@ -68,6 +73,7 @@ enum fscache_access_trace {
 	fscache_access_acquire_volume_end,
 	fscache_access_cache_pin,
 	fscache_access_cache_unpin,
+	fscache_access_lookup_cookie,
 	fscache_access_lookup_cookie_end,
 	fscache_access_lookup_cookie_end_failed,
 	fscache_access_relinquish_volume,
@@ -110,13 +116,18 @@ enum fscache_access_trace {
 	EM(fscache_cookie_discard,		"DISCARD  ")		\
 	EM(fscache_cookie_get_hash_collision,	"GET hcoll")		\
 	EM(fscache_cookie_get_end_access,	"GQ  endac")		\
+	EM(fscache_cookie_get_lru,		"GET lru  ")		\
+	EM(fscache_cookie_get_use_work,		"GQ  use  ")		\
 	EM(fscache_cookie_new_acquire,		"NEW acq  ")		\
 	EM(fscache_cookie_put_hash_collision,	"PUT hcoll")		\
+	EM(fscache_cookie_put_lru,		"PUT lru  ")		\
 	EM(fscache_cookie_put_over_queued,	"PQ  overq")		\
 	EM(fscache_cookie_put_relinquish,	"PUT relnq")		\
 	EM(fscache_cookie_put_withdrawn,	"PUT wthdn")		\
 	EM(fscache_cookie_put_work,		"PQ  work ")		\
 	EM(fscache_cookie_see_active,		"-   activ")		\
+	EM(fscache_cookie_see_lru_discard,	"-   x-lru")		\
+	EM(fscache_cookie_see_lru_do_one,	"-   lrudo")		\
 	EM(fscache_cookie_see_relinquish,	"-   x-rlq")		\
 	EM(fscache_cookie_see_withdraw,		"-   x-wth")		\
 	E_(fscache_cookie_see_work,		"-   work ")
@@ -126,6 +137,7 @@ enum fscache_access_trace {
 	EM(fscache_access_acquire_volume_end,	"END   acq_vol")	\
 	EM(fscache_access_cache_pin,		"PIN   cache  ")	\
 	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
+	EM(fscache_access_lookup_cookie,	"BEGIN lookup ")	\
 	EM(fscache_access_lookup_cookie_end,	"END   lookup ")	\
 	EM(fscache_access_lookup_cookie_end_failed,"END   lookupf")	\
 	EM(fscache_access_relinquish_volume,	"BEGIN rlq_vol")	\
-- 
cgit v1.2.3


From d24af13e2e2358a602740c7817ea90da43d3e740 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 15:53:34 +0100
Subject: fscache: Implement cookie invalidation

Add a function to invalidate the cache behind a cookie:

	void fscache_invalidate(struct fscache_cookie *cookie,
				const void *aux_data,
				loff_t size,
				unsigned int flags)

This causes any cached data for the specified cookie to be discarded.  If
the cookie is marked as being in use, a new cache object will be created if
possible and future I/O will use that instead.  In-flight I/O should be
abandoned (writes) or reconsidered (reads).  Each time it is called
cookie->inval_counter is incremented and this can be used to detect
invalidation at the end of an I/O operation.

The coherency data attached to the cookie can be updated and the cookie
size should be reset.  One flag is available, FSCACHE_INVAL_DIO_WRITE,
which should be used to indicate invalidation due to a DIO write on a
file.  This will temporarily disable caching for this cookie.

Changes
=======
ver #2:
 - Should only change to inval state if can get access to cache.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819602231.215744.11206598147269491575.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906909707.143852.18056070560477964891.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967107447.1823006.5945029409592119962.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021512640.640689.11418616313147754172.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  4 ++++
 include/linux/fscache.h        | 31 +++++++++++++++++++++++++++++++
 include/linux/netfs.h          |  1 +
 include/trace/events/fscache.h | 25 +++++++++++++++++++++++++
 4 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index ae6a75976450..1ad56bfd9d72 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -64,6 +64,9 @@ struct fscache_cache_ops {
 	/* Withdraw an object without any cookie access counts held */
 	void (*withdraw_cookie)(struct fscache_cookie *cookie);
 
+	/* Invalidate an object */
+	bool (*invalidate_cookie)(struct fscache_cookie *cookie);
+
 	/* Prepare to write to a live cache object */
 	void (*prepare_to_write)(struct fscache_cookie *cookie);
 };
@@ -96,6 +99,7 @@ extern void fscache_put_cookie(struct fscache_cookie *cookie,
 extern void fscache_end_cookie_access(struct fscache_cookie *cookie,
 				      enum fscache_access_trace why);
 extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie);
+extern void fscache_resume_after_invalidation(struct fscache_cookie *cookie);
 extern void fscache_caching_failed(struct fscache_cookie *cookie);
 
 /**
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index e6c321e5bf73..0f36d1fac237 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -39,6 +39,8 @@ struct fscache_cookie;
 #define FSCACHE_ADV_WRITE_CACHE		0x00 /* Do cache if written to locally */
 #define FSCACHE_ADV_WRITE_NOCACHE	0x02 /* Don't cache if written to locally */
 
+#define FSCACHE_INVAL_DIO_WRITE		0x01 /* Invalidate due to DIO write */
+
 /*
  * Data object state.
  */
@@ -47,6 +49,7 @@ enum fscache_cookie_state {
 	FSCACHE_COOKIE_STATE_LOOKING_UP,	/* The cache object is being looked up */
 	FSCACHE_COOKIE_STATE_CREATING,		/* The cache object is being created */
 	FSCACHE_COOKIE_STATE_ACTIVE,		/* The cache is active, readable and writable */
+	FSCACHE_COOKIE_STATE_INVALIDATING,	/* The cache is being invalidated */
 	FSCACHE_COOKIE_STATE_FAILED,		/* The cache failed, withdraw to clear */
 	FSCACHE_COOKIE_STATE_LRU_DISCARDING,	/* The cookie is being discarded by the LRU */
 	FSCACHE_COOKIE_STATE_WITHDRAWING,	/* The cookie is being withdrawn */
@@ -153,6 +156,7 @@ extern struct fscache_cookie *__fscache_acquire_cookie(
 extern void __fscache_use_cookie(struct fscache_cookie *, bool);
 extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *);
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
+extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int);
 
 /**
  * fscache_acquire_volume - Register a volume as desiring caching services
@@ -327,4 +331,31 @@ void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data
 	set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags);
 }
 
+/**
+ * fscache_invalidate - Notify cache that an object needs invalidation
+ * @cookie: The cookie representing the cache object
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
+ * @size: The revised size of the object.
+ * @flags: Invalidation flags (FSCACHE_INVAL_*)
+ *
+ * Notify the cache that an object is needs to be invalidated and that it
+ * should abort any retrievals or stores it is doing on the cache.  This
+ * increments inval_counter on the cookie which can be used by the caller to
+ * reconsider I/O requests as they complete.
+ *
+ * If @flags has FSCACHE_INVAL_DIO_WRITE set, this indicates that this is due
+ * to a direct I/O write and will cause caching to be disabled on this cookie
+ * until it is completely unused.
+ *
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
+ * description.
+ */
+static inline
+void fscache_invalidate(struct fscache_cookie *cookie,
+			const void *aux_data, loff_t size, unsigned int flags)
+{
+	if (fscache_cookie_enabled(cookie))
+		__fscache_invalidate(cookie, aux_data, size, flags);
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 1ea22fc48818..5a46fde65759 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -124,6 +124,7 @@ struct netfs_cache_resources {
 	void				*cache_priv;
 	void				*cache_priv2;
 	unsigned int			debug_id;	/* Cookie debug ID */
+	unsigned int			inval_counter;	/* object->inval_counter at begin_op */
 };
 
 /*
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index b0409b1fad23..294792881434 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -51,6 +51,7 @@ enum fscache_cookie_trace {
 	fscache_cookie_discard,
 	fscache_cookie_get_end_access,
 	fscache_cookie_get_hash_collision,
+	fscache_cookie_get_inval_work,
 	fscache_cookie_get_lru,
 	fscache_cookie_get_use_work,
 	fscache_cookie_new_acquire,
@@ -73,6 +74,8 @@ enum fscache_access_trace {
 	fscache_access_acquire_volume_end,
 	fscache_access_cache_pin,
 	fscache_access_cache_unpin,
+	fscache_access_invalidate_cookie,
+	fscache_access_invalidate_cookie_end,
 	fscache_access_lookup_cookie,
 	fscache_access_lookup_cookie_end,
 	fscache_access_lookup_cookie_end_failed,
@@ -116,6 +119,7 @@ enum fscache_access_trace {
 	EM(fscache_cookie_discard,		"DISCARD  ")		\
 	EM(fscache_cookie_get_hash_collision,	"GET hcoll")		\
 	EM(fscache_cookie_get_end_access,	"GQ  endac")		\
+	EM(fscache_cookie_get_inval_work,	"GQ  inval")		\
 	EM(fscache_cookie_get_lru,		"GET lru  ")		\
 	EM(fscache_cookie_get_use_work,		"GQ  use  ")		\
 	EM(fscache_cookie_new_acquire,		"NEW acq  ")		\
@@ -137,6 +141,8 @@ enum fscache_access_trace {
 	EM(fscache_access_acquire_volume_end,	"END   acq_vol")	\
 	EM(fscache_access_cache_pin,		"PIN   cache  ")	\
 	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
+	EM(fscache_access_invalidate_cookie,	"BEGIN inval  ")	\
+	EM(fscache_access_invalidate_cookie_end,"END   inval  ")	\
 	EM(fscache_access_lookup_cookie,	"BEGIN lookup ")	\
 	EM(fscache_access_lookup_cookie_end,	"END   lookup ")	\
 	EM(fscache_access_lookup_cookie_end_failed,"END   lookupf")	\
@@ -385,6 +391,25 @@ TRACE_EVENT(fscache_relinquish,
 		      __entry->n_active, __entry->flags, __entry->retire)
 	    );
 
+TRACE_EVENT(fscache_invalidate,
+	    TP_PROTO(struct fscache_cookie *cookie, loff_t new_size),
+
+	    TP_ARGS(cookie, new_size),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(loff_t,			new_size	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie->debug_id;
+		    __entry->new_size	= new_size;
+			   ),
+
+	    TP_printk("c=%08x sz=%llx",
+		      __entry->cookie, __entry->new_size)
+	    );
+
 #endif /* _TRACE_FSCACHE_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From d64f4554dd177c5891c02424a8d9e80590b55b35 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:06:34 +0100
Subject: fscache: Provide a means to begin an operation

Provide a function to begin a read operation:

	int fscache_begin_read_operation(
		struct netfs_cache_resources *cres,
		struct fscache_cookie *cookie)

This is primarily intended to be called by network filesystems on behalf of
netfslib, but may also be called to use the I/O access functions directly.
It attaches the resources required by the cache to cres struct from the
supplied cookie.

This holds access to the cache behind the cookie for the duration of the
operation and forces cache withdrawal and cookie invalidation to perform
synchronisation on the operation.  cres->inval_counter is set from the
cookie at this point so that it can be compared at the end of the
operation.

Note that this does not guarantee that the cache state is fully set up and
able to perform I/O immediately; looking up and creation may be left in
progress in the background.  The operations intended to be called by the
network filesystem, such as reading and writing, are expected to wait for
the cookie to move to the correct state.

This will, however, potentially sleep, waiting for a certain minimum state
to be set or for operations such as invalidate to advance far enough that
I/O can resume.


Also provide a function for the cache to call to wait for the cache object
to get to a state where it can be used for certain things:

	bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
					enum fscache_want_stage stage);

This looks at the cache resources provided by the begin function and waits
for them to get to an appropriate stage.  There's a choice of wanting just
some parameters (FSCACHE_WANT_PARAM) or the ability to do I/O
(FSCACHE_WANT_READ or FSCACHE_WANT_WRITE).

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819603692.215744.146724961588817028.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906910672.143852.13856103384424986357.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967110245.1823006.2239170567540431836.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021513617.640689.16627329360866150606.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  | 11 ++++++++++
 include/linux/fscache.h        | 49 ++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/fscache.h |  6 ++++++
 3 files changed, 66 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 1ad56bfd9d72..566497cf5f13 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -67,6 +67,10 @@ struct fscache_cache_ops {
 	/* Invalidate an object */
 	bool (*invalidate_cookie)(struct fscache_cookie *cookie);
 
+	/* Begin an operation for the netfs lib */
+	bool (*begin_operation)(struct netfs_cache_resources *cres,
+				enum fscache_want_state want_state);
+
 	/* Prepare to write to a live cache object */
 	void (*prepare_to_write)(struct fscache_cookie *cookie);
 };
@@ -101,6 +105,8 @@ extern void fscache_end_cookie_access(struct fscache_cookie *cookie,
 extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie);
 extern void fscache_resume_after_invalidation(struct fscache_cookie *cookie);
 extern void fscache_caching_failed(struct fscache_cookie *cookie);
+extern bool fscache_wait_for_operation(struct netfs_cache_resources *cred,
+				       enum fscache_want_state state);
 
 /**
  * fscache_cookie_state - Read the state of a cookie
@@ -129,4 +135,9 @@ static inline void *fscache_get_key(struct fscache_cookie *cookie)
 		return cookie->key;
 }
 
+static inline struct fscache_cookie *fscache_cres_cookie(struct netfs_cache_resources *cres)
+{
+	return cres->cache_priv;
+}
+
 #endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 0f36d1fac237..7cdc63c4fe35 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -41,6 +41,12 @@ struct fscache_cookie;
 
 #define FSCACHE_INVAL_DIO_WRITE		0x01 /* Invalidate due to DIO write */
 
+enum fscache_want_state {
+	FSCACHE_WANT_PARAMS,
+	FSCACHE_WANT_WRITE,
+	FSCACHE_WANT_READ,
+};
+
 /*
  * Data object state.
  */
@@ -157,6 +163,7 @@ extern void __fscache_use_cookie(struct fscache_cookie *, bool);
 extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *);
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
 extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int);
+extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *);
 
 /**
  * fscache_acquire_volume - Register a volume as desiring caching services
@@ -358,4 +365,46 @@ void fscache_invalidate(struct fscache_cookie *cookie,
 		__fscache_invalidate(cookie, aux_data, size, flags);
 }
 
+/**
+ * fscache_operation_valid - Return true if operations resources are usable
+ * @cres: The resources to check.
+ *
+ * Returns a pointer to the operations table if usable or NULL if not.
+ */
+static inline
+const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_resources *cres)
+{
+	return fscache_resources_valid(cres) ? cres->ops : NULL;
+}
+
+/**
+ * fscache_begin_read_operation - Begin a read operation for the netfs lib
+ * @cres: The cache resources for the read being performed
+ * @cookie: The cookie representing the cache object
+ *
+ * Begin a read operation on behalf of the netfs helper library.  @cres
+ * indicates the cache resources to which the operation state should be
+ * attached; @cookie indicates the cache object that will be accessed.
+ *
+ * This is intended to be called from the ->begin_cache_operation() netfs lib
+ * operation as implemented by the network filesystem.
+ *
+ * @cres->inval_counter is set from @cookie->inval_counter for comparison at
+ * the end of the operation.  This allows invalidation during the operation to
+ * be detected by the caller.
+ *
+ * Returns:
+ * * 0		- Success
+ * * -ENOBUFS	- No caching available
+ * * Other error code from the cache, such as -ENOMEM.
+ */
+static inline
+int fscache_begin_read_operation(struct netfs_cache_resources *cres,
+				 struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_enabled(cookie))
+		return __fscache_begin_read_operation(cres, cookie);
+	return -ENOBUFS;
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 294792881434..9f78c903b00a 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -76,6 +76,9 @@ enum fscache_access_trace {
 	fscache_access_cache_unpin,
 	fscache_access_invalidate_cookie,
 	fscache_access_invalidate_cookie_end,
+	fscache_access_io_not_live,
+	fscache_access_io_read,
+	fscache_access_io_wait,
 	fscache_access_lookup_cookie,
 	fscache_access_lookup_cookie_end,
 	fscache_access_lookup_cookie_end_failed,
@@ -143,6 +146,9 @@ enum fscache_access_trace {
 	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
 	EM(fscache_access_invalidate_cookie,	"BEGIN inval  ")	\
 	EM(fscache_access_invalidate_cookie_end,"END   inval  ")	\
+	EM(fscache_access_io_not_live,		"END   io_notl")	\
+	EM(fscache_access_io_read,		"BEGIN io_read")	\
+	EM(fscache_access_io_wait,		"WAIT  io     ")	\
 	EM(fscache_access_lookup_cookie,	"BEGIN lookup ")	\
 	EM(fscache_access_lookup_cookie_end,	"END   lookup ")	\
 	EM(fscache_access_lookup_cookie_end_failed,"END   lookupf")	\
-- 
cgit v1.2.3


From cdf262f29488e6c3432911ec487ea41918fcbcd7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Nov 2021 23:14:29 +0000
Subject: fscache: Count data storage objects in a cache

Count the data storage objects that are currently allocated in a cache.
This is used to pin certain cache structures until cache withdrawal is
complete.

Three helpers are provided to manage and make use of the count:

 (1) void fscache_count_object(struct fscache_cache *cache);

     This should be called by the cache backend to note that an object has
     been allocated and attached to the cache.

 (2) void fscache_uncount_object(struct fscache_cache *cache);

     This should be called by the backend to note that an object has been
     destroyed.  This sends a wakeup event that allows cache withdrawal to
     proceed if it was waiting for that object.

 (3) void fscache_wait_for_objects(struct fscache_cache *cache);

     This can be used by the backend to wait for all outstanding cache
     object to be destroyed.

Each cache's counter is displayed as part of /proc/fs/fscache/caches.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819608594.215744.1812706538117388252.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906911646.143852.168184059935530127.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967111846.1823006.9868154941573671255.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021516219.640689.4934796654308958158.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 566497cf5f13..337335d7a5e2 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -76,6 +76,7 @@ struct fscache_cache_ops {
 };
 
 extern struct workqueue_struct *fscache_wq;
+extern wait_queue_head_t fscache_clearance_waiters;
 
 /*
  * out-of-line cache backend functions
@@ -140,4 +141,42 @@ static inline struct fscache_cookie *fscache_cres_cookie(struct netfs_cache_reso
 	return cres->cache_priv;
 }
 
+/**
+ * fscache_count_object - Tell fscache that an object has been added
+ * @cache: The cache to account to
+ *
+ * Tell fscache that an object has been added to the cache.  This prevents the
+ * cache from tearing down the cache structure until the object is uncounted.
+ */
+static inline void fscache_count_object(struct fscache_cache *cache)
+{
+	atomic_inc(&cache->object_count);
+}
+
+/**
+ * fscache_uncount_object - Tell fscache that an object has been removed
+ * @cache: The cache to account to
+ *
+ * Tell fscache that an object has been removed from the cache and will no
+ * longer be accessed.  After this point, the cache cookie may be destroyed.
+ */
+static inline void fscache_uncount_object(struct fscache_cache *cache)
+{
+	if (atomic_dec_and_test(&cache->object_count))
+		wake_up_all(&fscache_clearance_waiters);
+}
+
+/**
+ * fscache_wait_for_objects - Wait for all objects to be withdrawn
+ * @cache: The cache to query
+ *
+ * Wait for all extant objects in a cache to finish being withdrawn
+ * and go away.
+ */
+static inline void fscache_wait_for_objects(struct fscache_cache *cache)
+{
+	wait_event(fscache_clearance_waiters,
+		   atomic_read(&cache->object_count) == 0);
+}
+
 #endif /* _LINUX_FSCACHE_CACHE_H */
-- 
cgit v1.2.3


From 8e7a867bb7309fbf47e8c2a68798b919fc02523f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 23:06:16 +0100
Subject: fscache: Provide read/write stat counters for the cache

Provide read/write stat counters for the cache backend to use.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819609532.215744.10821082637727410554.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906912598.143852.12960327989649429069.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967113830.1823006.3222957649202368162.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021517502.640689.6077928311710357342.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 337335d7a5e2..796c8b5c5305 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -179,4 +179,14 @@ static inline void fscache_wait_for_objects(struct fscache_cache *cache)
 		   atomic_read(&cache->object_count) == 0);
 }
 
+#ifdef CONFIG_FSCACHE_STATS
+extern atomic_t fscache_n_read;
+extern atomic_t fscache_n_write;
+#define fscache_count_read() atomic_inc(&fscache_n_read)
+#define fscache_count_write() atomic_inc(&fscache_n_write)
+#else
+#define fscache_count_read() do {} while(0)
+#define fscache_count_write() do {} while(0)
+#endif
+
 #endif /* _LINUX_FSCACHE_CACHE_H */
-- 
cgit v1.2.3


From ed1235eb78a7421cd0ac2ad09e931f8f07ccdc7c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 23:10:46 +0100
Subject: fscache: Provide a function to let the netfs update its coherency
 data

Provide a function to let the netfs update its coherency data:

	void fscache_update_cookie(struct fscache_cookie *cookie,
				   const void *aux_data,
				   const loff_t *object_size);

This will update the auxiliary data and/or the size of the object attached
to a cookie if either pointer is not-NULL and flag that the disk needs to
be updated.

Note that fscache_unuse_cookie() also allows this to be done.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819610438.215744.4223265964131424954.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906913530.143852.18150303220217653820.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967117795.1823006.7493373142653442595.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021518440.640689.6369952464473039268.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 7cdc63c4fe35..fc77648c8af6 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -338,6 +338,28 @@ void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data
 	set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags);
 }
 
+/**
+ * fscache_update_cookie - Request that a cache object be updated
+ * @cookie: The cookie representing the cache object
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
+ * @object_size: The current size of the object (may be NULL)
+ *
+ * Request an update of the index data for the cache object associated with the
+ * cookie.  The auxiliary data on the cookie will be updated first if @aux_data
+ * is set and the object size will be updated and the object possibly trimmed
+ * if @object_size is set.
+ *
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
+ * description.
+ */
+static inline
+void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data,
+			   const loff_t *object_size)
+{
+	if (fscache_cookie_enabled(cookie))
+		__fscache_update_cookie(cookie, aux_data, object_size);
+}
+
 /**
  * fscache_invalidate - Notify cache that an object needs invalidation
  * @cookie: The cookie representing the cache object
-- 
cgit v1.2.3


From 3a11b3a86366ccbf0818b088ffecadf8b2d61177 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 14 Sep 2021 09:47:45 +0100
Subject: netfs: Pass more information on how to deal with a hole in the cache

Pass more information to the cache on how to deal with a hole if it
encounters one when trying to read from the cache.  Three options are
provided:

 (1) NETFS_READ_HOLE_IGNORE.  Read the hole along with the data, assuming
     it to be a punched-out extent by the backing filesystem.

 (2) NETFS_READ_HOLE_CLEAR.  If there's a hole, erase the requested region
     of the cache and clear the read buffer.

 (3) NETFS_READ_HOLE_FAIL.  Fail the read if a hole is detected.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819612321.215744.9738308885948264476.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906914460.143852.6284247083607910189.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967119923.1823006.15637375885194297582.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021519762.640689.16994364383313159319.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/netfs.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5a46fde65759..b46c39d98bbd 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -196,6 +196,15 @@ struct netfs_read_request_ops {
 	void (*cleanup)(struct address_space *mapping, void *netfs_priv);
 };
 
+/*
+ * How to handle reading from a hole.
+ */
+enum netfs_read_from_hole {
+	NETFS_READ_HOLE_IGNORE,
+	NETFS_READ_HOLE_CLEAR,
+	NETFS_READ_HOLE_FAIL,
+};
+
 /*
  * Table of operations for access to a cache.  This is obtained by
  * rreq->ops->begin_cache_operation().
@@ -208,7 +217,7 @@ struct netfs_cache_ops {
 	int (*read)(struct netfs_cache_resources *cres,
 		    loff_t start_pos,
 		    struct iov_iter *iter,
-		    bool seek_data,
+		    enum netfs_read_from_hole read_hole,
 		    netfs_io_terminated_t term_func,
 		    void *term_func_priv);
 
-- 
cgit v1.2.3


From 9af1c6c3089b294ffa240e0fbba356666698b6d0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:06:34 +0100
Subject: fscache: Implement raw I/O interface

Provide a pair of functions to perform raw I/O on the cache.  The first
function allows an arbitrary asynchronous direct-IO read to be made against
a cache object, though the read should be aligned and sized appropriately
for the backing device:

        int fscache_read(struct netfs_cache_resources *cres,
                         loff_t start_pos,
                         struct iov_iter *iter,
                         enum netfs_read_from_hole read_hole,
                         netfs_io_terminated_t term_func,
                         void *term_func_priv);

The cache resources must have been previously initialised by
fscache_begin_read_operation().  A read operation is sent to the backing
filesystem, starting at start_pos within the file.  The size of the read is
specified by the iterator, as is the location of the output buffer.

If there is a hole in the data it can be ignored and left to the backing
filesystem to deal with (NETFS_READ_HOLE_IGNORE), a hole at the beginning
can be skipped over and the buffer padded with zeros
(NETFS_READ_HOLE_CLEAR) or -ENODATA can be given (NETFS_READ_HOLE_FAIL).

If term_func is not NULL, the operation may be performed asynchronously.
Upon completion, successful or otherwise, (*term_func)() will be called and
passed term_func_priv, along with an error or the amount of data
transferred.  If the op is run asynchronously, fscache_read() will return
-EIOCBQUEUED.

The second function allows an arbitrary asynchronous direct-IO write to be
made against a cache object, though the write should be aligned and sized
appropriately for the backing device:

        int fscache_write(struct netfs_cache_resources *cres,
                          loff_t start_pos,
                          struct iov_iter *iter,
                          netfs_io_terminated_t term_func,
                          void *term_func_priv);

This works in very similar way to fscache_read(), except that there's no
need to deal with holes (they're just overwritten).

The caller is responsible for preventing concurrent overlapping writes.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819613224.215744.7877577215582621254.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906915386.143852.16936177636106480724.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967122632.1823006.7487049517698562172.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021521420.640689.12747258780542678309.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h        | 74 ++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/fscache.h |  2 ++
 2 files changed, 76 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index fc77648c8af6..ae753cae0fdd 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -429,4 +429,78 @@ int fscache_begin_read_operation(struct netfs_cache_resources *cres,
 	return -ENOBUFS;
 }
 
+/**
+ * fscache_read - Start a read from the cache.
+ * @cres: The cache resources to use
+ * @start_pos: The beginning file offset in the cache file
+ * @iter: The buffer to fill - and also the length
+ * @read_hole: How to handle a hole in the data.
+ * @term_func: The function to call upon completion
+ * @term_func_priv: The private data for @term_func
+ *
+ * Start a read from the cache.  @cres indicates the cache object to read from
+ * and must be obtained by a call to fscache_begin_operation() beforehand.
+ *
+ * The data is read into the iterator, @iter, and that also indicates the size
+ * of the operation.  @start_pos is the start position in the file, though if
+ * @seek_data is set appropriately, the cache can use SEEK_DATA to find the
+ * next piece of data, writing zeros for the hole into the iterator.
+ *
+ * Upon termination of the operation, @term_func will be called and supplied
+ * with @term_func_priv plus the amount of data written, if successful, or the
+ * error code otherwise.
+ *
+ * @read_hole indicates how a partially populated region in the cache should be
+ * handled.  It can be one of a number of settings:
+ *
+ *	NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read).
+ *
+ *	NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer
+ *				skipped over, then do as for IGNORE.
+ *
+ *	NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole.
+ */
+static inline
+int fscache_read(struct netfs_cache_resources *cres,
+		 loff_t start_pos,
+		 struct iov_iter *iter,
+		 enum netfs_read_from_hole read_hole,
+		 netfs_io_terminated_t term_func,
+		 void *term_func_priv)
+{
+	const struct netfs_cache_ops *ops = fscache_operation_valid(cres);
+	return ops->read(cres, start_pos, iter, read_hole,
+			 term_func, term_func_priv);
+}
+
+/**
+ * fscache_write - Start a write to the cache.
+ * @cres: The cache resources to use
+ * @start_pos: The beginning file offset in the cache file
+ * @iter: The data to write - and also the length
+ * @term_func: The function to call upon completion
+ * @term_func_priv: The private data for @term_func
+ *
+ * Start a write to the cache.  @cres indicates the cache object to write to and
+ * must be obtained by a call to fscache_begin_operation() beforehand.
+ *
+ * The data to be written is obtained from the iterator, @iter, and that also
+ * indicates the size of the operation.  @start_pos is the start position in
+ * the file.
+ *
+ * Upon termination of the operation, @term_func will be called and supplied
+ * with @term_func_priv plus the amount of data written, if successful, or the
+ * error code otherwise.
+ */
+static inline
+int fscache_write(struct netfs_cache_resources *cres,
+		  loff_t start_pos,
+		  struct iov_iter *iter,
+		  netfs_io_terminated_t term_func,
+		  void *term_func_priv)
+{
+	const struct netfs_cache_ops *ops = fscache_operation_valid(cres);
+	return ops->write(cres, start_pos, iter, term_func, term_func_priv);
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 9f78c903b00a..2459d75659cf 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -79,6 +79,7 @@ enum fscache_access_trace {
 	fscache_access_io_not_live,
 	fscache_access_io_read,
 	fscache_access_io_wait,
+	fscache_access_io_write,
 	fscache_access_lookup_cookie,
 	fscache_access_lookup_cookie_end,
 	fscache_access_lookup_cookie_end_failed,
@@ -149,6 +150,7 @@ enum fscache_access_trace {
 	EM(fscache_access_io_not_live,		"END   io_notl")	\
 	EM(fscache_access_io_read,		"BEGIN io_read")	\
 	EM(fscache_access_io_wait,		"WAIT  io     ")	\
+	EM(fscache_access_io_write,		"BEGIN io_writ")	\
 	EM(fscache_access_lookup_cookie,	"BEGIN lookup ")	\
 	EM(fscache_access_lookup_cookie_end,	"END   lookup ")	\
 	EM(fscache_access_lookup_cookie_end_failed,"END   lookupf")	\
-- 
cgit v1.2.3


From b6e16652d6c0e4f9e9b120f66966ec153f0623fc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:06:34 +0100
Subject: fscache: Implement higher-level write I/O interface

Provide a higher-level function than fscache_write() to perform a write
from an inode's pagecache to the cache, whilst fending off concurrent
writes by means of the PG_fscache mark on a page:

	void fscache_write_to_cache(struct fscache_cookie *cookie,
				    struct address_space *mapping,
				    loff_t start,
				    size_t len,
				    loff_t i_size,
				    netfs_io_terminated_t term_func,
				    void *term_func_priv,
				    bool caching);

If caching is false, this function does nothing except call (*term_func)()
if given.  It assumes that, in such a case, PG_fscache will not have been
set on the pages.

Otherwise, if caching is true, this function requires the source pages to
have had PG_fscache set on them before calling.  start and len define the
region of the file to be modified and i_size indicates the new file size.
The source pages are extracted from the mapping.

term_func and term_func_priv work as for fscache_write().  The PG_fscache
marks will be cleared at the end of the operation, before term_func is
called or the function otherwise returns.

There is an additonal helper function to clear the PG_fscache bits from a
range of pages:

	void fscache_clear_page_bits(struct fscache_cookie *cookie,
				     struct address_space *mapping,
				     loff_t start, size_t len,
				     bool caching);

If caching is true, the pages to be managed are expected to be located on
mapping in the range defined by start and len.  If caching is false, it
does nothing.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819614155.215744.5528123235123721230.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906916346.143852.15632773570362489926.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967123599.1823006.12946816026724657428.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021522672.640689.4381958316198807813.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ae753cae0fdd..9d469613e16c 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -165,6 +165,11 @@ extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
 extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int);
 extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *);
 
+extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *,
+				     loff_t, size_t, loff_t, netfs_io_terminated_t, void *,
+				     bool);
+extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t);
+
 /**
  * fscache_acquire_volume - Register a volume as desiring caching services
  * @volume_key: An identification string for the volume
@@ -503,4 +508,62 @@ int fscache_write(struct netfs_cache_resources *cres,
 	return ops->write(cres, start_pos, iter, term_func, term_func_priv);
 }
 
+/**
+ * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages
+ * @cookie: The cookie representing the cache object
+ * @mapping: The netfs inode to use as the source
+ * @start: The start position in @mapping
+ * @len: The amount of data to unlock
+ * @caching: If PG_fscache has been set
+ *
+ * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's
+ * waiting.
+ */
+static inline void fscache_clear_page_bits(struct fscache_cookie *cookie,
+					   struct address_space *mapping,
+					   loff_t start, size_t len,
+					   bool caching)
+{
+	if (caching)
+		__fscache_clear_page_bits(mapping, start, len);
+}
+
+/**
+ * fscache_write_to_cache - Save a write to the cache and clear PG_fscache
+ * @cookie: The cookie representing the cache object
+ * @mapping: The netfs inode to use as the source
+ * @start: The start position in @mapping
+ * @len: The amount of data to write back
+ * @i_size: The new size of the inode
+ * @term_func: The function to call upon completion
+ * @term_func_priv: The private data for @term_func
+ * @caching: If PG_fscache has been set
+ *
+ * Helper function for a netfs to write dirty data from an inode into the cache
+ * object that's backing it.
+ *
+ * @start and @len describe the range of the data.  This does not need to be
+ * page-aligned, but to satisfy DIO requirements, the cache may expand it up to
+ * the page boundaries on either end.  All the pages covering the range must be
+ * marked with PG_fscache.
+ *
+ * If given, @term_func will be called upon completion and supplied with
+ * @term_func_priv.  Note that the PG_fscache flags will have been cleared by
+ * this point, so the netfs must retain its own pin on the mapping.
+ */
+static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
+					  struct address_space *mapping,
+					  loff_t start, size_t len, loff_t i_size,
+					  netfs_io_terminated_t term_func,
+					  void *term_func_priv,
+					  bool caching)
+{
+	if (caching)
+		__fscache_write_to_cache(cookie, mapping, start, len, i_size,
+					 term_func, term_func_priv, caching);
+	else if (term_func)
+		term_func(term_func_priv, -ENOBUFS, false);
+
+}
+
 #endif /* _LINUX_FSCACHE_H */
-- 
cgit v1.2.3


From 08276bdae68b022a7726edf7416b6748e3df5395 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 23:50:01 +0100
Subject: vfs, fscache: Implement pinning of cache usage for writeback

Cachefiles has a problem in that it needs to keep the backing file for a
cookie open whilst there are local modifications pending that need to be
written to it.  However, we don't want to keep the file open indefinitely,
as that causes EMFILE/ENFILE/ENOMEM problems.

Reopening the cache file, however, is a problem if this is being done due
to writeback triggered by exit().  Some filesystems will oops if we try to
open a file in that context because they want to access current->fs or
other resources that have already been dismantled.

To get around this, I added the following:

 (1) An inode flag, I_PINNING_FSCACHE_WB, to be set on a network filesystem
     inode to indicate that we have a usage count on the cookie caching
     that inode.

 (2) A flag in struct writeback_control, unpinned_fscache_wb, that is set
     when __writeback_single_inode() clears the last dirty page from
     i_pages - at which point it clears I_PINNING_FSCACHE_WB and sets this
     flag.

     This has to be done here so that clearing I_PINNING_FSCACHE_WB can be
     done atomically with the check of PAGECACHE_TAG_DIRTY that clears
     I_DIRTY_PAGES.

 (3) A function, fscache_set_page_dirty(), which if it is not set, sets
     I_PINNING_FSCACHE_WB and calls fscache_use_cookie() to pin the cache
     resources.

 (4) A function, fscache_unpin_writeback(), to be called by ->write_inode()
     to unuse the cookie.

 (5) A function, fscache_clear_inode_writeback(), to be called when the
     inode is evicted, before clear_inode() is called.  This cleans up any
     lingering I_PINNING_FSCACHE_WB.

The network filesystem can then use these tools to make sure that
fscache_write_to_cache() can write locally modified data to the cache as
well as to the server.

For the future, I'm working on write helpers for netfs lib that should
allow this facility to be removed by keeping track of the dirty regions
separately - but that's incomplete at the moment and is also going to be
affected by folios, one way or another, since it deals with pages

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819615157.215744.17623791756928043114.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906917856.143852.8224898306177154573.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967124567.1823006.14188359004568060298.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021524705.640689.17824932021727663017.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fs.h        |  3 +++
 include/linux/fscache.h   | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/writeback.h |  1 +
 3 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..2c0b8e77d9ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2418,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *			Used to detect that mark_inode_dirty() should not move
  * 			inode between dirty lists.
  *
+ * I_PINNING_FSCACHE_WB	Inode is pinning an fscache object for writeback.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2440,6 +2442,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define I_CREATING		(1 << 15)
 #define I_DONTCACHE		(1 << 16)
 #define I_SYNC_QUEUED		(1 << 17)
+#define I_PINNING_FSCACHE_WB	(1 << 18)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9d469613e16c..18e725671594 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -16,6 +16,7 @@
 
 #include <linux/fs.h>
 #include <linux/netfs.h>
+#include <linux/writeback.h>
 
 #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
 #define __fscache_available (1)
@@ -566,4 +567,44 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
 
 }
 
+#if __fscache_available
+extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie);
+#else
+#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE)))
+#endif
+
+/**
+ * fscache_unpin_writeback - Unpin writeback resources
+ * @wbc: The writeback control
+ * @cookie: The cookie referring to the cache object
+ *
+ * Unpin the writeback resources pinned by fscache_set_page_dirty().  This is
+ * intended to be called by the netfs's ->write_inode() method.
+ */
+static inline void fscache_unpin_writeback(struct writeback_control *wbc,
+					   struct fscache_cookie *cookie)
+{
+	if (wbc->unpinned_fscache_wb)
+		fscache_unuse_cookie(cookie, NULL, NULL);
+}
+
+/**
+ * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode
+ * @cookie: The cookie referring to the cache object
+ * @inode: The inode to clean up
+ * @aux: Auxiliary data to apply to the inode
+ *
+ * Clear any writeback resources held by an inode when the inode is evicted.
+ * This must be called before clear_inode() is called.
+ */
+static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
+						 struct inode *inode,
+						 const void *aux)
+{
+	if (inode->i_state & I_PINNING_FSCACHE_WB) {
+		loff_t i_size = i_size_read(inode);
+		fscache_unuse_cookie(cookie, aux, &i_size);
+	}
+}
+
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3bfd487d1dd2..fec248ab1fec 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -68,6 +68,7 @@ struct writeback_control {
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
+	unsigned unpinned_fscache_wb:1;	/* Cleared I_PINNING_FSCACHE_WB */
 
 	/*
 	 * When writeback IOs are bounced through async layers, only the
-- 
cgit v1.2.3


From 1f67e6d0b18853c641d861a671f46a4964a88510 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:06:34 +0100
Subject: fscache: Provide a function to note the release of a page

Provide a function to be called from a network filesystem's releasepage
method to indicate that a page has been released that might have been a
reflection of data upon the server - and now that data must be reloaded
from the server or the cache.

This is used to end an optimisation for empty files, in particular files
that have just been created locally, whereby we know there cannot yet be
any data that we would need to read from the server or the cache.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819617128.215744.4725572296135656508.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906920354.143852.7511819614661372008.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967128061.1823006.611781655060034988.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021525963.640689.9264556596205140044.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 18e725671594..28ce258c1f87 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -607,4 +607,20 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
 	}
 }
 
+/**
+ * fscache_note_page_release - Note that a netfs page got released
+ * @cookie: The cookie corresponding to the file
+ *
+ * Note that a page that has been copied to the cache has been released.  This
+ * means that future reads will need to look in the cache to see if it's there.
+ */
+static inline
+void fscache_note_page_release(struct fscache_cookie *cookie)
+{
+	if (cookie &&
+	    test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
+	    test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))
+		clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags);
+}
+
 #endif /* _LINUX_FSCACHE_H */
-- 
cgit v1.2.3


From 16a96bdf92d5af06f9fa6a01a4b08e2fdfed2e5b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 20 Oct 2021 14:06:34 +0100
Subject: fscache: Provide a function to resize a cookie

Provide a function to change the size of the storage attached to a cookie,
to match the size of the file being cached when it's changed by truncate or
fallocate:

	void fscache_resize_cookie(struct fscache_cookie *cookie,
				   loff_t new_size);

This acts synchronously and is expected to run under the inode lock of the
caller.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819621839.215744.7895597119803515402.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906922387.143852.16394459879816147793.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967128998.1823006.10740669081985775576.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021527861.640689.3466382085497236267.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h  |  4 ++++
 include/linux/fscache.h        | 18 ++++++++++++++++++
 include/trace/events/fscache.h | 25 +++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 796c8b5c5305..3fa4902dc87c 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -64,6 +64,10 @@ struct fscache_cache_ops {
 	/* Withdraw an object without any cookie access counts held */
 	void (*withdraw_cookie)(struct fscache_cookie *cookie);
 
+	/* Change the size of a data object */
+	void (*resize_cookie)(struct netfs_cache_resources *cres,
+			      loff_t new_size);
+
 	/* Invalidate an object */
 	bool (*invalidate_cookie)(struct fscache_cookie *cookie);
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 28ce258c1f87..86b1c0db1de5 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -163,6 +163,7 @@ extern struct fscache_cookie *__fscache_acquire_cookie(
 extern void __fscache_use_cookie(struct fscache_cookie *, bool);
 extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *);
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
+extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t);
 extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int);
 extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *);
 
@@ -366,6 +367,23 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data,
 		__fscache_update_cookie(cookie, aux_data, object_size);
 }
 
+/**
+ * fscache_resize_cookie - Request that a cache object be resized
+ * @cookie: The cookie representing the cache object
+ * @new_size: The new size of the object (may be NULL)
+ *
+ * Request that the size of an object be changed.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_resize_cookie(struct fscache_cookie *cookie, loff_t new_size)
+{
+	if (fscache_cookie_enabled(cookie))
+		__fscache_resize_cookie(cookie, new_size);
+}
+
 /**
  * fscache_invalidate - Notify cache that an object needs invalidation
  * @cookie: The cookie representing the cache object
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 2459d75659cf..5fa37a8b4ec7 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -78,6 +78,7 @@ enum fscache_access_trace {
 	fscache_access_invalidate_cookie_end,
 	fscache_access_io_not_live,
 	fscache_access_io_read,
+	fscache_access_io_resize,
 	fscache_access_io_wait,
 	fscache_access_io_write,
 	fscache_access_lookup_cookie,
@@ -149,6 +150,7 @@ enum fscache_access_trace {
 	EM(fscache_access_invalidate_cookie_end,"END   inval  ")	\
 	EM(fscache_access_io_not_live,		"END   io_notl")	\
 	EM(fscache_access_io_read,		"BEGIN io_read")	\
+	EM(fscache_access_io_resize,		"BEGIN io_resz")	\
 	EM(fscache_access_io_wait,		"WAIT  io     ")	\
 	EM(fscache_access_io_write,		"BEGIN io_writ")	\
 	EM(fscache_access_lookup_cookie,	"BEGIN lookup ")	\
@@ -418,6 +420,29 @@ TRACE_EVENT(fscache_invalidate,
 		      __entry->cookie, __entry->new_size)
 	    );
 
+TRACE_EVENT(fscache_resize,
+	    TP_PROTO(struct fscache_cookie *cookie, loff_t new_size),
+
+	    TP_ARGS(cookie, new_size),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(loff_t,			old_size	)
+		    __field(loff_t,			new_size	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie->debug_id;
+		    __entry->old_size	= cookie->object_size;
+		    __entry->new_size	= new_size;
+			   ),
+
+	    TP_printk("c=%08x os=%08llx sz=%08llx",
+		      __entry->cookie,
+		      __entry->old_size,
+		      __entry->new_size)
+	    );
+
 #endif /* _TRACE_FSCACHE_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 77443f6171f32626f24b2f97494c71a6bd83831a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 26 Nov 2021 14:32:29 +0000
Subject: cachefiles: Introduce rewritten driver

Introduce basic skeleton of the rewritten cachefiles driver including
config options so that it can be enabled for compilation.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819622766.215744.9108359326983195047.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906923341.143852.3856498104256721447.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967130320.1823006.15791456613198441566.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021528993.640689.9069695476048171884.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 49 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 include/trace/events/cachefiles.h

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
new file mode 100644
index 000000000000..5ee0aabb20be
--- /dev/null
+++ b/include/trace/events/cachefiles.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* CacheFiles tracepoints
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cachefiles
+
+#if !defined(_TRACE_CACHEFILES_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CACHEFILES_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Define enums for tracing information.
+ */
+#ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+#endif
+
+/*
+ * Define enum -> string mappings for display.
+ */
+
+
+/*
+ * Export enum symbols via userspace.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef E_
+#define EM(a, b)	{ a, b },
+#define E_(a, b)	{ a, b }
+
+
+#endif /* _TRACE_CACHEFILES_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From ecf5a6ce15f90d1fe6bc326c720d21fc0e73fc88 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 21 Oct 2021 08:42:18 +0100
Subject: cachefiles: Add a couple of tracepoints for logging errors

Add two trace points to log errors, one for vfs operations like mkdir or
create, and one for I/O operations, like read, write or truncate.

Also add the beginnings of a struct that is going to represent a data file
and place a debugging ID in it for the tracepoints to record.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819625632.215744.17907340966178411033.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906926297.143852.18267924605548658911.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967135390.1823006.2512120406360156424.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021534029.640689.1875723624947577095.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 94 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 5ee0aabb20be..9bd5a8a60801 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -18,11 +18,49 @@
 #ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
 #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
+enum cachefiles_error_trace {
+	cachefiles_trace_fallocate_error,
+	cachefiles_trace_getxattr_error,
+	cachefiles_trace_link_error,
+	cachefiles_trace_lookup_error,
+	cachefiles_trace_mkdir_error,
+	cachefiles_trace_notify_change_error,
+	cachefiles_trace_open_error,
+	cachefiles_trace_read_error,
+	cachefiles_trace_remxattr_error,
+	cachefiles_trace_rename_error,
+	cachefiles_trace_seek_error,
+	cachefiles_trace_setxattr_error,
+	cachefiles_trace_statfs_error,
+	cachefiles_trace_tmpfile_error,
+	cachefiles_trace_trunc_error,
+	cachefiles_trace_unlink_error,
+	cachefiles_trace_write_error,
+};
+
 #endif
 
 /*
  * Define enum -> string mappings for display.
  */
+#define cachefiles_error_traces						\
+	EM(cachefiles_trace_fallocate_error,	"fallocate")		\
+	EM(cachefiles_trace_getxattr_error,	"getxattr")		\
+	EM(cachefiles_trace_link_error,		"link")			\
+	EM(cachefiles_trace_lookup_error,	"lookup")		\
+	EM(cachefiles_trace_mkdir_error,	"mkdir")		\
+	EM(cachefiles_trace_notify_change_error, "notify_change")	\
+	EM(cachefiles_trace_open_error,		"open")			\
+	EM(cachefiles_trace_read_error,		"read")			\
+	EM(cachefiles_trace_remxattr_error,	"remxattr")		\
+	EM(cachefiles_trace_rename_error,	"rename")		\
+	EM(cachefiles_trace_seek_error,		"seek")			\
+	EM(cachefiles_trace_setxattr_error,	"setxattr")		\
+	EM(cachefiles_trace_statfs_error,	"statfs")		\
+	EM(cachefiles_trace_tmpfile_error,	"tmpfile")		\
+	EM(cachefiles_trace_trunc_error,	"trunc")		\
+	EM(cachefiles_trace_unlink_error,	"unlink")		\
+	E_(cachefiles_trace_write_error,	"write")
 
 
 /*
@@ -33,6 +71,8 @@
 #define EM(a, b) TRACE_DEFINE_ENUM(a);
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
+cachefiles_error_traces;
+
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
  * will be printed in the output.
@@ -43,6 +83,60 @@
 #define E_(a, b)	{ a, b }
 
 
+TRACE_EVENT(cachefiles_vfs_error,
+	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer,
+		     int error, enum cachefiles_error_trace where),
+
+	    TP_ARGS(obj, backer, error, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+		    __field(enum cachefiles_error_trace,	where	)
+		    __field(short,				error	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : 0;
+		    __entry->backer	= backer->i_ino;
+		    __entry->error	= error;
+		    __entry->where	= where;
+			   ),
+
+	    TP_printk("o=%08x b=%08x %s e=%d",
+		      __entry->obj,
+		      __entry->backer,
+		      __print_symbolic(__entry->where, cachefiles_error_traces),
+		      __entry->error)
+	    );
+
+TRACE_EVENT(cachefiles_io_error,
+	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer,
+		     int error, enum cachefiles_error_trace where),
+
+	    TP_ARGS(obj, backer, error, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+		    __field(enum cachefiles_error_trace,	where	)
+		    __field(short,				error	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : 0;
+		    __entry->backer	= backer->i_ino;
+		    __entry->error	= error;
+		    __entry->where	= where;
+			   ),
+
+	    TP_printk("o=%08x b=%08x %s e=%d",
+		      __entry->obj,
+		      __entry->backer,
+		      __print_symbolic(__entry->where, cachefiles_error_traces),
+		      __entry->error)
+	    );
+
 #endif /* _TRACE_CACHEFILES_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 1bd9c4e4f0494915b2391f373d25096579f835ff Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 18 Nov 2021 08:58:08 +0000
Subject: vfs, cachefiles: Mark a backing file in use with an inode flag

Use an inode flag, S_KERNEL_FILE, to mark that a backing file is in use by
the kernel to prevent cachefiles or other kernel services from interfering
with that file.

Alter rmdir to reject attempts to remove a directory marked with this flag.
This is used by cachefiles to prevent cachefilesd from removing them.

Using S_SWAPFILE instead isn't really viable as that has other effects in
the I/O paths.

Changes
=======
ver #3:
 - Check for the object pointer being NULL in the tracepoints rather than
   the caller.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819630256.215744.4815885535039369574.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906931596.143852.8642051223094013028.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967141000.1823006.12920680657559677789.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021541207.640689.564689725898537127.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fs.h                |  1 +
 include/trace/events/cachefiles.h | 42 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c0b8e77d9ab..bcf1ca430139 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2249,6 +2249,7 @@ struct super_operations {
 #define S_ENCRYPTED	(1 << 14) /* Encrypted file (using fs/crypto/) */
 #define S_CASEFOLD	(1 << 15) /* Casefolded file */
 #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
+#define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 9bd5a8a60801..6331cd29880d 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -83,6 +83,48 @@ cachefiles_error_traces;
 #define E_(a, b)	{ a, b }
 
 
+TRACE_EVENT(cachefiles_mark_active,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct inode *inode),
+
+	    TP_ARGS(obj, inode),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		obj		)
+		    __field(ino_t,			inode		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : 0;
+		    __entry->inode	= inode->i_ino;
+			   ),
+
+	    TP_printk("o=%08x i=%lx",
+		      __entry->obj, __entry->inode)
+	    );
+
+TRACE_EVENT(cachefiles_mark_inactive,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct inode *inode),
+
+	    TP_ARGS(obj, inode),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		obj		)
+		    __field(ino_t,			inode		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : 0;
+		    __entry->inode	= inode->i_ino;
+			   ),
+
+	    TP_printk("o=%08x i=%lx",
+		      __entry->obj, __entry->inode)
+	    );
+
 TRACE_EVENT(cachefiles_vfs_error,
 	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer,
 		     int error, enum cachefiles_error_trace where),
-- 
cgit v1.2.3


From 13871bad1ef7f41947c816a9e342aa9fa8643c5e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Nov 2021 16:30:25 +0000
Subject: cachefiles: Add tracepoints for calls to the VFS

Add tracepoints in cachefiles to monitor when it does various VFS
operations, such as mkdir.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819638517.215744.12773133137536579766.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906938316.143852.17227990869551737803.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967147139.1823006.4909879317496543392.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021546287.640689.3501604495002415631.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 176 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 6331cd29880d..5975ea4977b2 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -18,6 +18,21 @@
 #ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
 #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
+enum fscache_why_object_killed {
+	FSCACHE_OBJECT_IS_STALE,
+	FSCACHE_OBJECT_IS_WEIRD,
+	FSCACHE_OBJECT_INVALIDATED,
+	FSCACHE_OBJECT_NO_SPACE,
+	FSCACHE_OBJECT_WAS_RETIRED,
+	FSCACHE_OBJECT_WAS_CULLED,
+};
+
+enum cachefiles_trunc_trace {
+	cachefiles_trunc_dio_adjust,
+	cachefiles_trunc_expand_tmpfile,
+	cachefiles_trunc_shrink,
+};
+
 enum cachefiles_error_trace {
 	cachefiles_trace_fallocate_error,
 	cachefiles_trace_getxattr_error,
@@ -43,6 +58,19 @@ enum cachefiles_error_trace {
 /*
  * Define enum -> string mappings for display.
  */
+#define cachefiles_obj_kill_traces				\
+	EM(FSCACHE_OBJECT_IS_STALE,	"stale")		\
+	EM(FSCACHE_OBJECT_IS_WEIRD,	"weird")		\
+	EM(FSCACHE_OBJECT_INVALIDATED,	"inval")		\
+	EM(FSCACHE_OBJECT_NO_SPACE,	"no_space")		\
+	EM(FSCACHE_OBJECT_WAS_RETIRED,	"was_retired")		\
+	E_(FSCACHE_OBJECT_WAS_CULLED,	"was_culled")
+
+#define cachefiles_trunc_traces						\
+	EM(cachefiles_trunc_dio_adjust,		"DIOADJ")		\
+	EM(cachefiles_trunc_expand_tmpfile,	"EXPTMP")		\
+	E_(cachefiles_trunc_shrink,		"SHRINK")
+
 #define cachefiles_error_traces						\
 	EM(cachefiles_trace_fallocate_error,	"fallocate")		\
 	EM(cachefiles_trace_getxattr_error,	"getxattr")		\
@@ -71,6 +99,8 @@ enum cachefiles_error_trace {
 #define EM(a, b) TRACE_DEFINE_ENUM(a);
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
+cachefiles_obj_kill_traces;
+cachefiles_trunc_traces;
 cachefiles_error_traces;
 
 /*
@@ -83,6 +113,152 @@ cachefiles_error_traces;
 #define E_(a, b)	{ a, b }
 
 
+TRACE_EVENT(cachefiles_lookup,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de),
+
+	    TP_ARGS(obj, de),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		obj	)
+		    __field(short,			error	)
+		    __field(unsigned long,		ino	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->ino	= (!IS_ERR(de) && d_backing_inode(de) ?
+					   d_backing_inode(de)->i_ino : 0);
+		    __entry->error	= IS_ERR(de) ? PTR_ERR(de) : 0;
+			   ),
+
+	    TP_printk("o=%08x i=%lx e=%d",
+		      __entry->obj, __entry->ino, __entry->error)
+	    );
+
+TRACE_EVENT(cachefiles_tmpfile,
+	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer),
+
+	    TP_ARGS(obj, backer),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->backer	= backer->i_ino;
+			   ),
+
+	    TP_printk("o=%08x b=%08x",
+		      __entry->obj,
+		      __entry->backer)
+	    );
+
+TRACE_EVENT(cachefiles_link,
+	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer),
+
+	    TP_ARGS(obj, backer),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->backer	= backer->i_ino;
+			   ),
+
+	    TP_printk("o=%08x b=%08x",
+		      __entry->obj,
+		      __entry->backer)
+	    );
+
+TRACE_EVENT(cachefiles_unlink,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     enum fscache_why_object_killed why),
+
+	    TP_ARGS(obj, de, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		obj		)
+		    __field(struct dentry *,		de		)
+		    __field(enum fscache_why_object_killed, why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : UINT_MAX;
+		    __entry->de		= de;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("o=%08x d=%p w=%s",
+		      __entry->obj, __entry->de,
+		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
+	    );
+
+TRACE_EVENT(cachefiles_rename,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     struct dentry *to,
+		     enum fscache_why_object_killed why),
+
+	    TP_ARGS(obj, de, to, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		obj		)
+		    __field(struct dentry *,		de		)
+		    __field(struct dentry *,		to		)
+		    __field(enum fscache_why_object_killed, why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : UINT_MAX;
+		    __entry->de		= de;
+		    __entry->to		= to;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("o=%08x d=%p t=%p w=%s",
+		      __entry->obj, __entry->de, __entry->to,
+		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
+	    );
+
+TRACE_EVENT(cachefiles_trunc,
+	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer,
+		     loff_t from, loff_t to, enum cachefiles_trunc_trace why),
+
+	    TP_ARGS(obj, backer, from, to, why),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+		    __field(enum cachefiles_trunc_trace,	why	)
+		    __field(loff_t,				from	)
+		    __field(loff_t,				to	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->backer	= backer->i_ino;
+		    __entry->from	= from;
+		    __entry->to		= to;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("o=%08x b=%08x %s l=%llx->%llx",
+		      __entry->obj,
+		      __entry->backer,
+		      __print_symbolic(__entry->why, cachefiles_trunc_traces),
+		      __entry->from,
+		      __entry->to)
+	    );
+
 TRACE_EVENT(cachefiles_mark_active,
 	    TP_PROTO(struct cachefiles_object *obj,
 		     struct inode *inode),
-- 
cgit v1.2.3


From df98e87f2091774c377ddfaedfe64bd90ed4bdca Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 17 Nov 2021 16:22:21 +0000
Subject: cachefiles: Implement object lifecycle funcs

Implement allocate, get, see and put functions for the cachefiles_object
struct.  The members of the struct we're going to need are also added.

Additionally, implement a lifecycle tracepoint.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819639457.215744.4600093239395728232.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906939569.143852.3594314410666551982.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967148857.1823006.6332962598220464364.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021547762.640689.8422781599594931000.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 58 +++++++++++++++++++++++++++++++++++++++
 include/trace/events/fscache.h    |  4 +++
 2 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 5975ea4977b2..54815cc776ba 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -18,6 +18,21 @@
 #ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
 #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
+enum cachefiles_obj_ref_trace {
+	cachefiles_obj_get_ioreq,
+	cachefiles_obj_new,
+	cachefiles_obj_put_alloc_fail,
+	cachefiles_obj_put_detach,
+	cachefiles_obj_put_ioreq,
+	cachefiles_obj_see_clean_commit,
+	cachefiles_obj_see_clean_delete,
+	cachefiles_obj_see_clean_drop_tmp,
+	cachefiles_obj_see_lookup_cookie,
+	cachefiles_obj_see_lookup_failed,
+	cachefiles_obj_see_withdraw_cookie,
+	cachefiles_obj_see_withdrawal,
+};
+
 enum fscache_why_object_killed {
 	FSCACHE_OBJECT_IS_STALE,
 	FSCACHE_OBJECT_IS_WEIRD,
@@ -66,6 +81,20 @@ enum cachefiles_error_trace {
 	EM(FSCACHE_OBJECT_WAS_RETIRED,	"was_retired")		\
 	E_(FSCACHE_OBJECT_WAS_CULLED,	"was_culled")
 
+#define cachefiles_obj_ref_traces					\
+	EM(cachefiles_obj_get_ioreq,		"GET ioreq")		\
+	EM(cachefiles_obj_new,			"NEW obj")		\
+	EM(cachefiles_obj_put_alloc_fail,	"PUT alloc_fail")	\
+	EM(cachefiles_obj_put_detach,		"PUT detach")		\
+	EM(cachefiles_obj_put_ioreq,		"PUT ioreq")		\
+	EM(cachefiles_obj_see_clean_commit,	"SEE clean_commit")	\
+	EM(cachefiles_obj_see_clean_delete,	"SEE clean_delete")	\
+	EM(cachefiles_obj_see_clean_drop_tmp,	"SEE clean_drop_tmp")	\
+	EM(cachefiles_obj_see_lookup_cookie,	"SEE lookup_cookie")	\
+	EM(cachefiles_obj_see_lookup_failed,	"SEE lookup_failed")	\
+	EM(cachefiles_obj_see_withdraw_cookie,	"SEE withdraw_cookie")	\
+	E_(cachefiles_obj_see_withdrawal,	"SEE withdrawal")
+
 #define cachefiles_trunc_traces						\
 	EM(cachefiles_trunc_dio_adjust,		"DIOADJ")		\
 	EM(cachefiles_trunc_expand_tmpfile,	"EXPTMP")		\
@@ -100,6 +129,7 @@ enum cachefiles_error_trace {
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
 cachefiles_obj_kill_traces;
+cachefiles_obj_ref_traces;
 cachefiles_trunc_traces;
 cachefiles_error_traces;
 
@@ -113,6 +143,34 @@ cachefiles_error_traces;
 #define E_(a, b)	{ a, b }
 
 
+TRACE_EVENT(cachefiles_ref,
+	    TP_PROTO(unsigned int object_debug_id,
+		     unsigned int cookie_debug_id,
+		     int usage,
+		     enum cachefiles_obj_ref_trace why),
+
+	    TP_ARGS(object_debug_id, cookie_debug_id, usage, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj		)
+		    __field(unsigned int,			cookie		)
+		    __field(enum cachefiles_obj_ref_trace,	why		)
+		    __field(int,				usage		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= object_debug_id;
+		    __entry->cookie	= cookie_debug_id;
+		    __entry->usage	= usage;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("c=%08x o=%08x u=%d %s",
+		      __entry->cookie, __entry->obj, __entry->usage,
+		      __print_symbolic(__entry->why, cachefiles_obj_ref_traces))
+	    );
+
 TRACE_EVENT(cachefiles_lookup,
 	    TP_PROTO(struct cachefiles_object *obj,
 		     struct dentry *de),
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 5fa37a8b4ec7..d9d830296ec3 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -49,6 +49,7 @@ enum fscache_volume_trace {
 enum fscache_cookie_trace {
 	fscache_cookie_collision,
 	fscache_cookie_discard,
+	fscache_cookie_get_attach_object,
 	fscache_cookie_get_end_access,
 	fscache_cookie_get_hash_collision,
 	fscache_cookie_get_inval_work,
@@ -57,6 +58,7 @@ enum fscache_cookie_trace {
 	fscache_cookie_new_acquire,
 	fscache_cookie_put_hash_collision,
 	fscache_cookie_put_lru,
+	fscache_cookie_put_object,
 	fscache_cookie_put_over_queued,
 	fscache_cookie_put_relinquish,
 	fscache_cookie_put_withdrawn,
@@ -122,6 +124,7 @@ enum fscache_access_trace {
 #define fscache_cookie_traces						\
 	EM(fscache_cookie_collision,		"*COLLIDE*")		\
 	EM(fscache_cookie_discard,		"DISCARD  ")		\
+	EM(fscache_cookie_get_attach_object,	"GET attch")		\
 	EM(fscache_cookie_get_hash_collision,	"GET hcoll")		\
 	EM(fscache_cookie_get_end_access,	"GQ  endac")		\
 	EM(fscache_cookie_get_inval_work,	"GQ  inval")		\
@@ -130,6 +133,7 @@ enum fscache_access_trace {
 	EM(fscache_cookie_new_acquire,		"NEW acq  ")		\
 	EM(fscache_cookie_put_hash_collision,	"PUT hcoll")		\
 	EM(fscache_cookie_put_lru,		"PUT lru  ")		\
+	EM(fscache_cookie_put_object,		"PUT obj  ")		\
 	EM(fscache_cookie_put_over_queued,	"PQ  overq")		\
 	EM(fscache_cookie_put_relinquish,	"PUT relnq")		\
 	EM(fscache_cookie_put_withdrawn,	"PUT wthdn")		\
-- 
cgit v1.2.3


From 72b957856b0c09eee542afcff29705dd0adda654 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 17 Nov 2021 16:11:07 +0000
Subject: cachefiles: Implement metadata/coherency data storage in xattrs

Use an xattr on each backing file in the cache to store some metadata, such
as the content type and the coherency data.

Five content types are defined:

 (0) No content stored.

 (1) The file contains a single monolithic blob and must be all or nothing.
     This would be used for something like an AFS directory or a symlink.

 (2) The file is populated with content completely up to a point with
     nothing beyond that.

 (3) The file has a map attached and is sparsely populated.  This would be
     stored in one or more additional xattrs.

 (4) The file is dirty, being in the process of local modification and the
     contents are not necessarily represented correctly by the metadata.
     The file should be deleted if this is seen on binding.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819641320.215744.16346770087799536862.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906942248.143852.5423738045012094252.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967151734.1823006.9301249989443622576.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021550471.640689.553853918307994335.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 56 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 54815cc776ba..98b1eee4a7a8 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -42,6 +42,19 @@ enum fscache_why_object_killed {
 	FSCACHE_OBJECT_WAS_CULLED,
 };
 
+enum cachefiles_coherency_trace {
+	cachefiles_coherency_check_aux,
+	cachefiles_coherency_check_content,
+	cachefiles_coherency_check_dirty,
+	cachefiles_coherency_check_len,
+	cachefiles_coherency_check_objsize,
+	cachefiles_coherency_check_ok,
+	cachefiles_coherency_check_type,
+	cachefiles_coherency_check_xattr,
+	cachefiles_coherency_set_fail,
+	cachefiles_coherency_set_ok,
+};
+
 enum cachefiles_trunc_trace {
 	cachefiles_trunc_dio_adjust,
 	cachefiles_trunc_expand_tmpfile,
@@ -95,6 +108,18 @@ enum cachefiles_error_trace {
 	EM(cachefiles_obj_see_withdraw_cookie,	"SEE withdraw_cookie")	\
 	E_(cachefiles_obj_see_withdrawal,	"SEE withdrawal")
 
+#define cachefiles_coherency_traces					\
+	EM(cachefiles_coherency_check_aux,	"BAD aux ")		\
+	EM(cachefiles_coherency_check_content,	"BAD cont")		\
+	EM(cachefiles_coherency_check_dirty,	"BAD dirt")		\
+	EM(cachefiles_coherency_check_len,	"BAD len ")		\
+	EM(cachefiles_coherency_check_objsize,	"BAD osiz")		\
+	EM(cachefiles_coherency_check_ok,	"OK      ")		\
+	EM(cachefiles_coherency_check_type,	"BAD type")		\
+	EM(cachefiles_coherency_check_xattr,	"BAD xatt")		\
+	EM(cachefiles_coherency_set_fail,	"SET fail")		\
+	E_(cachefiles_coherency_set_ok,		"SET ok  ")
+
 #define cachefiles_trunc_traces						\
 	EM(cachefiles_trunc_dio_adjust,		"DIOADJ")		\
 	EM(cachefiles_trunc_expand_tmpfile,	"EXPTMP")		\
@@ -130,6 +155,7 @@ enum cachefiles_error_trace {
 
 cachefiles_obj_kill_traces;
 cachefiles_obj_ref_traces;
+cachefiles_coherency_traces;
 cachefiles_trunc_traces;
 cachefiles_error_traces;
 
@@ -287,6 +313,36 @@ TRACE_EVENT(cachefiles_rename,
 		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
 	    );
 
+TRACE_EVENT(cachefiles_coherency,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     ino_t ino,
+		     enum cachefiles_content content,
+		     enum cachefiles_coherency_trace why),
+
+	    TP_ARGS(obj, ino, content, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(enum cachefiles_coherency_trace,	why	)
+		    __field(enum cachefiles_content,		content	)
+		    __field(u64,				ino	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->why	= why;
+		    __entry->content	= content;
+		    __entry->ino	= ino;
+			   ),
+
+	    TP_printk("o=%08x %s i=%llx c=%u",
+		      __entry->obj,
+		      __print_symbolic(__entry->why, cachefiles_coherency_traces),
+		      __entry->ino,
+		      __entry->content)
+	    );
+
 TRACE_EVENT(cachefiles_trunc,
 	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer,
 		     loff_t from, loff_t to, enum cachefiles_trunc_trace why),
-- 
cgit v1.2.3


From 287fd611238dd4b7e32fd3a8985aa387d26c4f29 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 21 Oct 2021 11:05:53 +0100
Subject: cachefiles: Implement begin and end I/O operation

Implement the methods for beginning and ending an I/O operation.

When called to begin an I/O operation, we are guaranteed that the cookie
has reached a certain stage (we're called by fscache after it has done a
suitable wait).

If a file is available, we paste a ref over into the cache resources for
the I/O routines to use.  This means that the object can be invalidated
whilst the I/O is ongoing without the need to synchronise as the file
pointer in the object is replaced, but the file pointer in the cache
resources is unaffected.

Ending the operation just requires ditching any refs we have and dropping
the access guarantee that fscache got for us on the cookie.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819645033.215744.2199344081658268312.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906951916.143852.9531384743995679857.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967161222.1823006.4461476204800357263.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021559030.640689.3684291785218094142.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/fscache.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index d9d830296ec3..1594aefadeac 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -78,6 +78,7 @@ enum fscache_access_trace {
 	fscache_access_cache_unpin,
 	fscache_access_invalidate_cookie,
 	fscache_access_invalidate_cookie_end,
+	fscache_access_io_end,
 	fscache_access_io_not_live,
 	fscache_access_io_read,
 	fscache_access_io_resize,
@@ -152,6 +153,7 @@ enum fscache_access_trace {
 	EM(fscache_access_cache_unpin,		"UNPIN cache  ")	\
 	EM(fscache_access_invalidate_cookie,	"BEGIN inval  ")	\
 	EM(fscache_access_invalidate_cookie_end,"END   inval  ")	\
+	EM(fscache_access_io_end,		"END   io     ")	\
 	EM(fscache_access_io_not_live,		"END   io_notl")	\
 	EM(fscache_access_io_read,		"BEGIN io_read")	\
 	EM(fscache_access_io_resize,		"BEGIN io_resz")	\
-- 
cgit v1.2.3


From 047487c947e8b96b94579c3a33207bd4e266b4c6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 21 Oct 2021 00:19:46 +0100
Subject: cachefiles: Implement the I/O routines

Implement the I/O routines for cachefiles.  There are two sets of routines
here: preparation and actual I/O.

Preparation for read involves looking to see whether there is data present,
and how much.  Netfslib tells us what it wants us to do and we have the
option of adjusting shrinking and telling it whether to read from the
cache, download from the server or simply clear a region.

Preparation for write involves checking for space and defending against
possibly running short of space, if necessary punching out a hole in the
file so that we don't leave old data in the cache if we update the
coherency information.

Then there's a read routine and a write routine.  They wait for the cookie
state to move to something appropriate and then start a potentially
asynchronous direct I/O operation upon it.

Changes
=======
ver #2:
 - Fix a misassigned variable[1].

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/YaZOCk9zxApPattb@archlinux-ax161/ [1]
Link: https://lore.kernel.org/r/163819647945.215744.17827962047487125939.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906954666.143852.1504887120569779407.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967163110.1823006.9206718511874339672.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021562168.640689.8802250542405732391.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/cachefiles.h | 121 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 98b1eee4a7a8..ab1376ebc3ab 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -61,6 +61,17 @@ enum cachefiles_trunc_trace {
 	cachefiles_trunc_shrink,
 };
 
+enum cachefiles_prepare_read_trace {
+	cachefiles_trace_read_after_eof,
+	cachefiles_trace_read_found_hole,
+	cachefiles_trace_read_found_part,
+	cachefiles_trace_read_have_data,
+	cachefiles_trace_read_no_data,
+	cachefiles_trace_read_no_file,
+	cachefiles_trace_read_seek_error,
+	cachefiles_trace_read_seek_nxio,
+};
+
 enum cachefiles_error_trace {
 	cachefiles_trace_fallocate_error,
 	cachefiles_trace_getxattr_error,
@@ -125,6 +136,16 @@ enum cachefiles_error_trace {
 	EM(cachefiles_trunc_expand_tmpfile,	"EXPTMP")		\
 	E_(cachefiles_trunc_shrink,		"SHRINK")
 
+#define cachefiles_prepare_read_traces					\
+	EM(cachefiles_trace_read_after_eof,	"after-eof ")		\
+	EM(cachefiles_trace_read_found_hole,	"found-hole")		\
+	EM(cachefiles_trace_read_found_part,	"found-part")		\
+	EM(cachefiles_trace_read_have_data,	"have-data ")		\
+	EM(cachefiles_trace_read_no_data,	"no-data   ")		\
+	EM(cachefiles_trace_read_no_file,	"no-file   ")		\
+	EM(cachefiles_trace_read_seek_error,	"seek-error")		\
+	E_(cachefiles_trace_read_seek_nxio,	"seek-enxio")
+
 #define cachefiles_error_traces						\
 	EM(cachefiles_trace_fallocate_error,	"fallocate")		\
 	EM(cachefiles_trace_getxattr_error,	"getxattr")		\
@@ -157,6 +178,7 @@ cachefiles_obj_kill_traces;
 cachefiles_obj_ref_traces;
 cachefiles_coherency_traces;
 cachefiles_trunc_traces;
+cachefiles_prepare_read_traces;
 cachefiles_error_traces;
 
 /*
@@ -343,6 +365,105 @@ TRACE_EVENT(cachefiles_coherency,
 		      __entry->content)
 	    );
 
+TRACE_EVENT(cachefiles_prep_read,
+	    TP_PROTO(struct netfs_read_subrequest *sreq,
+		     enum netfs_read_source source,
+		     enum cachefiles_prepare_read_trace why,
+		     ino_t cache_inode),
+
+	    TP_ARGS(sreq, source, why, cache_inode),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		rreq		)
+		    __field(unsigned short,		index		)
+		    __field(unsigned short,		flags		)
+		    __field(enum netfs_read_source,	source		)
+		    __field(enum cachefiles_prepare_read_trace,	why	)
+		    __field(size_t,			len		)
+		    __field(loff_t,			start		)
+		    __field(unsigned int,		netfs_inode	)
+		    __field(unsigned int,		cache_inode	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->rreq	= sreq->rreq->debug_id;
+		    __entry->index	= sreq->debug_index;
+		    __entry->flags	= sreq->flags;
+		    __entry->source	= source;
+		    __entry->why	= why;
+		    __entry->len	= sreq->len;
+		    __entry->start	= sreq->start;
+		    __entry->netfs_inode = sreq->rreq->inode->i_ino;
+		    __entry->cache_inode = cache_inode;
+			   ),
+
+	    TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x",
+		      __entry->rreq, __entry->index,
+		      __print_symbolic(__entry->source, netfs_sreq_sources),
+		      __print_symbolic(__entry->why, cachefiles_prepare_read_traces),
+		      __entry->flags,
+		      __entry->start, __entry->len,
+		      __entry->netfs_inode, __entry->cache_inode)
+	    );
+
+TRACE_EVENT(cachefiles_read,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct inode *backer,
+		     loff_t start,
+		     size_t len),
+
+	    TP_ARGS(obj, backer, start, len),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+		    __field(size_t,				len	)
+		    __field(loff_t,				start	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->backer	= backer->i_ino;
+		    __entry->start	= start;
+		    __entry->len	= len;
+			   ),
+
+	    TP_printk("o=%08x b=%08x s=%llx l=%zx",
+		      __entry->obj,
+		      __entry->backer,
+		      __entry->start,
+		      __entry->len)
+	    );
+
+TRACE_EVENT(cachefiles_write,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct inode *backer,
+		     loff_t start,
+		     size_t len),
+
+	    TP_ARGS(obj, backer, start, len),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			obj	)
+		    __field(unsigned int,			backer	)
+		    __field(size_t,				len	)
+		    __field(loff_t,				start	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj->debug_id;
+		    __entry->backer	= backer->i_ino;
+		    __entry->start	= start;
+		    __entry->len	= len;
+			   ),
+
+	    TP_printk("o=%08x b=%08x s=%llx l=%zx",
+		      __entry->obj,
+		      __entry->backer,
+		      __entry->start,
+		      __entry->len)
+	    );
+
 TRACE_EVENT(cachefiles_trunc,
 	    TP_PROTO(struct cachefiles_object *obj, struct inode *backer,
 		     loff_t from, loff_t to, enum cachefiles_trunc_trace why),
-- 
cgit v1.2.3


From 32e150037dce368d129996ffe5f98217b1974d9e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 14 Dec 2021 09:51:43 +0000
Subject: fscache, cachefiles: Store the volume coherency data

Store the volume coherency data in an xattr and check it when we rebind the
volume.  If it doesn't match the cache volume is moved to the graveyard and
rebuilt anew.

Changes
=======
ver #4:
 - Remove a couple of debugging prints.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/163967164397.1823006.2950539849831291830.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021563138.640689.15851092065380543119.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h           |  2 ++
 include/trace/events/cachefiles.h | 42 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 86b1c0db1de5..7bd35f60d19a 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -87,6 +87,8 @@ struct fscache_volume {
 #define FSCACHE_VOLUME_COLLIDED_WITH	2	/* Volume was collided with */
 #define FSCACHE_VOLUME_ACQUIRE_PENDING	3	/* Volume is waiting to complete acquisition */
 #define FSCACHE_VOLUME_CREATING		4	/* Volume is being created on disk */
+	u8				coherency_len;	/* Length of the coherency data */
+	u8				coherency[];	/* Coherency data */
 };
 
 /*
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index ab1376ebc3ab..1172529b5b49 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -40,6 +40,7 @@ enum fscache_why_object_killed {
 	FSCACHE_OBJECT_NO_SPACE,
 	FSCACHE_OBJECT_WAS_RETIRED,
 	FSCACHE_OBJECT_WAS_CULLED,
+	FSCACHE_VOLUME_IS_WEIRD,
 };
 
 enum cachefiles_coherency_trace {
@@ -53,6 +54,11 @@ enum cachefiles_coherency_trace {
 	cachefiles_coherency_check_xattr,
 	cachefiles_coherency_set_fail,
 	cachefiles_coherency_set_ok,
+	cachefiles_coherency_vol_check_cmp,
+	cachefiles_coherency_vol_check_ok,
+	cachefiles_coherency_vol_check_xattr,
+	cachefiles_coherency_vol_set_fail,
+	cachefiles_coherency_vol_set_ok,
 };
 
 enum cachefiles_trunc_trace {
@@ -103,7 +109,8 @@ enum cachefiles_error_trace {
 	EM(FSCACHE_OBJECT_INVALIDATED,	"inval")		\
 	EM(FSCACHE_OBJECT_NO_SPACE,	"no_space")		\
 	EM(FSCACHE_OBJECT_WAS_RETIRED,	"was_retired")		\
-	E_(FSCACHE_OBJECT_WAS_CULLED,	"was_culled")
+	EM(FSCACHE_OBJECT_WAS_CULLED,	"was_culled")		\
+	E_(FSCACHE_VOLUME_IS_WEIRD,	"volume_weird")
 
 #define cachefiles_obj_ref_traces					\
 	EM(cachefiles_obj_get_ioreq,		"GET ioreq")		\
@@ -129,7 +136,12 @@ enum cachefiles_error_trace {
 	EM(cachefiles_coherency_check_type,	"BAD type")		\
 	EM(cachefiles_coherency_check_xattr,	"BAD xatt")		\
 	EM(cachefiles_coherency_set_fail,	"SET fail")		\
-	E_(cachefiles_coherency_set_ok,		"SET ok  ")
+	EM(cachefiles_coherency_set_ok,		"SET ok  ")		\
+	EM(cachefiles_coherency_vol_check_cmp,	"VOL BAD cmp ")		\
+	EM(cachefiles_coherency_vol_check_ok,	"VOL OK      ")		\
+	EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt")		\
+	EM(cachefiles_coherency_vol_set_fail,	"VOL SET fail")		\
+	E_(cachefiles_coherency_vol_set_ok,	"VOL SET ok  ")
 
 #define cachefiles_trunc_traces						\
 	EM(cachefiles_trunc_dio_adjust,		"DIOADJ")		\
@@ -365,6 +377,32 @@ TRACE_EVENT(cachefiles_coherency,
 		      __entry->content)
 	    );
 
+TRACE_EVENT(cachefiles_vol_coherency,
+	    TP_PROTO(struct cachefiles_volume *volume,
+		     ino_t ino,
+		     enum cachefiles_coherency_trace why),
+
+	    TP_ARGS(volume, ino, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			vol	)
+		    __field(enum cachefiles_coherency_trace,	why	)
+		    __field(u64,				ino	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->vol	= volume->vcookie->debug_id;
+		    __entry->why	= why;
+		    __entry->ino	= ino;
+			   ),
+
+	    TP_printk("V=%08x %s i=%llx",
+		      __entry->vol,
+		      __print_symbolic(__entry->why, cachefiles_coherency_traces),
+		      __entry->ino)
+	    );
+
 TRACE_EVENT(cachefiles_prep_read,
 	    TP_PROTO(struct netfs_read_subrequest *sreq,
 		     enum netfs_read_source source,
-- 
cgit v1.2.3


From 3929eca769b5a231010b4978acc61c0735da198f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 21 Oct 2021 21:58:29 +0100
Subject: fscache, cachefiles: Display stats of no-space events

Add stat counters of no-space events that caused caching not to happen and
display in /proc/fs/fscache/stats.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819653216.215744.17210522251617386509.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906958369.143852.7257100711818401748.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967166917.1823006.14842444049198947892.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021566184.640689.4417328329632709265.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 3fa4902dc87c..007e47f38610 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -186,11 +186,17 @@ static inline void fscache_wait_for_objects(struct fscache_cache *cache)
 #ifdef CONFIG_FSCACHE_STATS
 extern atomic_t fscache_n_read;
 extern atomic_t fscache_n_write;
+extern atomic_t fscache_n_no_write_space;
+extern atomic_t fscache_n_no_create_space;
 #define fscache_count_read() atomic_inc(&fscache_n_read)
 #define fscache_count_write() atomic_inc(&fscache_n_write)
+#define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space)
+#define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space)
 #else
 #define fscache_count_read() do {} while(0)
 #define fscache_count_write() do {} while(0)
+#define fscache_count_no_write_space() do {} while(0)
+#define fscache_count_no_create_space() do {} while(0)
 #endif
 
 #endif /* _LINUX_FSCACHE_CACHE_H */
-- 
cgit v1.2.3


From 9f08ebc3438baaaefcc79654b330209b83397f17 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 22 Oct 2021 09:17:58 +0100
Subject: fscache, cachefiles: Display stat of culling events

Add a stat counter of culling events whereby the cache backend culls a file
to make space (when asked by cachefilesd in this case) and display in
/proc/fs/fscache/stats.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/163819654165.215744.3797804661644212436.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906961387.143852.9291157239960289090.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967168266.1823006.14436200166581605746.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021567619.640689.4339228906248763197.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache-cache.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 007e47f38610..a174cedf4d90 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -188,15 +188,18 @@ extern atomic_t fscache_n_read;
 extern atomic_t fscache_n_write;
 extern atomic_t fscache_n_no_write_space;
 extern atomic_t fscache_n_no_create_space;
+extern atomic_t fscache_n_culled;
 #define fscache_count_read() atomic_inc(&fscache_n_read)
 #define fscache_count_write() atomic_inc(&fscache_n_write)
 #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space)
 #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space)
+#define fscache_count_culled() atomic_inc(&fscache_n_culled)
 #else
 #define fscache_count_read() do {} while(0)
 #define fscache_count_write() do {} while(0)
 #define fscache_count_no_write_space() do {} while(0)
 #define fscache_count_no_create_space() do {} while(0)
+#define fscache_count_culled() do {} while(0)
 #endif
 
 #endif /* _LINUX_FSCACHE_CACHE_H */
-- 
cgit v1.2.3


From 2efd61a608b0039911924d2e5d7028eb37496e85 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 10 Dec 2021 16:36:20 +0000
Subject: KVM: Warn if mark_page_dirty() is called without an active vCPU

The various kvm_write_guest() and mark_page_dirty() functions must only
ever be called in the context of an active vCPU, because if dirty ring
tracking is enabled it may simply oops when kvm_get_running_vcpu()
returns NULL for the vcpu and then kvm_dirty_ring_get() dereferences it.

This oops was reported by "butt3rflyh4ck" <butterflyhuangxx@gmail.com> in
https://lore.kernel.org/kvm/CAFcO6XOmoS7EacN_n6v4Txk7xL7iqRa2gABg3F7E3Naf5uG94g@mail.gmail.com/

That actual bug will be fixed under separate cover but this warning
should help to prevent new ones from being added.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211210163625.2886-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_dirty_ring.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 4da8d4a4140b..906f899813dc 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -43,11 +43,6 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
 	return 0;
 }
 
-static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
-{
-	return NULL;
-}
-
 static inline int kvm_dirty_ring_reset(struct kvm *kvm,
 				       struct kvm_dirty_ring *ring)
 {
@@ -78,7 +73,6 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
 
 u32 kvm_dirty_ring_get_rsvd_entries(void);
 int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
-struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
 
 /*
  * called with kvm->slots_lock held, returns the number of
-- 
cgit v1.2.3


From 982ed0de4753ed6e71dbd40f82a5a066baf133ed Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 10 Dec 2021 16:36:21 +0000
Subject: KVM: Reinstate gfn_to_pfn_cache with invalidation support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can be used in two modes. There is an atomic mode where the cached
mapping is accessed while holding the rwlock, and a mode where the
physical address is used by a vCPU in guest mode.

For the latter case, an invalidation will wake the vCPU with the new
KVM_REQ_GPC_INVALIDATE, and the architecture will need to refresh any
caches it still needs to access before entering guest mode again.

Only one vCPU can be targeted by the wake requests; it's simple enough
to make it wake all vCPUs or even a mask but I don't see a use case for
that additional complexity right now.

Invalidation happens from the invalidate_range_start MMU notifier, which
needs to be able to sleep in order to wake the vCPU and wait for it.

This means that revalidation potentially needs to "wait" for the MMU
operation to complete and the invalidate_range_end notifier to be
invoked. Like the vCPU when it takes a page fault in that period, we
just spin — fixing that in a future patch by implementing an actual
*wait* may be another part of shaving this particularly hirsute yak.

As noted in the comments in the function itself, the only case where
the invalidate_range_start notifier is expected to be called *without*
being able to sleep is when the OOM reaper is killing the process. In
that case, we expect the vCPU threads already to have exited, and thus
there will be nothing to wake, and no reason to wait. So we clear the
KVM_REQUEST_WAIT bit and send the request anyway, then complain loudly
if there actually *was* anything to wake up.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211210163625.2886-3-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 103 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_types.h |  18 ++++++++
 2 files changed, 121 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f9bbcf519280..9bbb1f1d9e48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -155,6 +155,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
 #define KVM_REQ_VM_DEAD           (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_GPC_INVALIDATE    (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -593,6 +594,10 @@ struct kvm {
 	unsigned long mn_active_invalidate_count;
 	struct rcuwait mn_memslots_update_rcuwait;
 
+	/* For management / invalidation of gfn_to_pfn_caches */
+	spinlock_t gpc_lock;
+	struct list_head gpc_list;
+
 	/*
 	 * created_vcpus is protected by kvm->lock, and is incremented
 	 * at the beginning of KVM_CREATE_VCPU.  online_vcpus is only
@@ -1099,6 +1104,104 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 			 unsigned long len);
 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+/**
+ * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
+ *                             given guest physical address.
+ *
+ * @kvm:	   pointer to kvm instance.
+ * @gpc:	   struct gfn_to_pfn_cache object.
+ * @vcpu:	   vCPU to be used for marking pages dirty and to be woken on
+ *		   invalidation.
+ * @guest_uses_pa: indicates that the resulting host physical PFN is used while
+ *		   @vcpu is IN_GUEST_MODE so invalidations should wake it.
+ * @kernel_map:    requests a kernel virtual mapping (kmap / memremap).
+ * @gpa:	   guest physical address to map.
+ * @len:	   sanity check; the range being access must fit a single page.
+ * @dirty:         mark the cache dirty immediately.
+ *
+ * @return:	   0 for success.
+ *		   -EINVAL for a mapping which would cross a page boundary.
+ *                 -EFAULT for an untranslatable guest physical address.
+ *
+ * This primes a gfn_to_pfn_cache and links it into the @kvm's list for
+ * invalidations to be processed. Invalidation callbacks to @vcpu using
+ * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM
+ * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check()
+ * to ensure that the cache is valid before accessing the target page.
+ */
+int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+			      struct kvm_vcpu *vcpu, bool guest_uses_pa,
+			      bool kernel_map, gpa_t gpa, unsigned long len,
+			      bool dirty);
+
+/**
+ * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
+ *
+ * @kvm:	   pointer to kvm instance.
+ * @gpc:	   struct gfn_to_pfn_cache object.
+ * @gpa:	   current guest physical address to map.
+ * @len:	   sanity check; the range being access must fit a single page.
+ * @dirty:         mark the cache dirty immediately.
+ *
+ * @return:	   %true if the cache is still valid and the address matches.
+ *		   %false if the cache is not valid.
+ *
+ * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock
+ * while calling this function, and then continue to hold the lock until the
+ * access is complete.
+ *
+ * Callers in IN_GUEST_MODE may do so without locking, although they should
+ * still hold a read lock on kvm->scru for the memslot checks.
+ */
+bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+				gpa_t gpa, unsigned long len);
+
+/**
+ * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache.
+ *
+ * @kvm:	   pointer to kvm instance.
+ * @gpc:	   struct gfn_to_pfn_cache object.
+ * @gpa:	   updated guest physical address to map.
+ * @len:	   sanity check; the range being access must fit a single page.
+ * @dirty:         mark the cache dirty immediately.
+ *
+ * @return:	   0 for success.
+ *		   -EINVAL for a mapping which would cross a page boundary.
+ *                 -EFAULT for an untranslatable guest physical address.
+ *
+ * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful
+ * returm from this function does not mean the page can be immediately
+ * accessed because it may have raced with an invalidation. Callers must
+ * still lock and check the cache status, as this function does not return
+ * with the lock still held to permit access.
+ */
+int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+				 gpa_t gpa, unsigned long len, bool dirty);
+
+/**
+ * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
+ *
+ * @kvm:	   pointer to kvm instance.
+ * @gpc:	   struct gfn_to_pfn_cache object.
+ *
+ * This unmaps the referenced page and marks it dirty, if appropriate. The
+ * cache is left in the invalid state but at least the mapping from GPA to
+ * userspace HVA will remain cached and can be reused on a subsequent
+ * refresh.
+ */
+void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+
+/**
+ * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
+ *
+ * @kvm:	   pointer to kvm instance.
+ * @gpc:	   struct gfn_to_pfn_cache object.
+ *
+ * This removes a cache from the @kvm's list to be processed on MMU notifier
+ * invocation.
+ */
+void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
 
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 888ef12862c9..dceac12c1ce5 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -19,6 +19,7 @@ struct kvm_memslots;
 enum kvm_mr_change;
 
 #include <linux/types.h>
+#include <linux/spinlock_types.h>
 
 #include <asm/kvm_types.h>
 
@@ -53,6 +54,23 @@ struct gfn_to_hva_cache {
 	struct kvm_memory_slot *memslot;
 };
 
+struct gfn_to_pfn_cache {
+	u64 generation;
+	gpa_t gpa;
+	unsigned long uhva;
+	struct kvm_memory_slot *memslot;
+	struct kvm_vcpu *vcpu;
+	struct list_head list;
+	rwlock_t lock;
+	void *khva;
+	kvm_pfn_t pfn;
+	bool active;
+	bool valid;
+	bool dirty;
+	bool kernel_map;
+	bool guest_uses_pa;
+};
+
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 /*
  * Memory caches are used to preallocate memory ahead of various MMU flows,
-- 
cgit v1.2.3


From 14243b387137a4afbe1df5d9dc15182d6657bb79 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 10 Dec 2021 16:36:23 +0000
Subject: KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel
 delivery

This adds basic support for delivering 2 level event channels to a guest.

Initially, it only supports delivery via the IRQ routing table, triggered
by an eventfd. In order to do so, it has a kvm_xen_set_evtchn_fast()
function which will use the pre-mapped shared_info page if it already
exists and is still valid, while the slow path through the irqfd_inject
workqueue will remap the shared_info page if necessary.

It sets the bits in the shared_info page but not the vcpu_info; that is
deferred to __kvm_xen_has_interrupt() which raises the vector to the
appropriate vCPU.

Add a 'verbose' mode to xen_shinfo_test while adding test cases for this.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211210163625.2886-5-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  7 +++++++
 include/uapi/linux/kvm.h | 11 +++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9bbb1f1d9e48..3c47b146851a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -497,6 +497,12 @@ struct kvm_hv_sint {
 	u32 sint;
 };
 
+struct kvm_xen_evtchn {
+	u32 port;
+	u32 vcpu;
+	u32 priority;
+};
+
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
@@ -517,6 +523,7 @@ struct kvm_kernel_irq_routing_entry {
 		} msi;
 		struct kvm_s390_adapter_int adapter;
 		struct kvm_hv_sint hv_sint;
+		struct kvm_xen_evtchn xen_evtchn;
 	};
 	struct hlist_node link;
 };
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 469f05d69c8d..fbfd70d965c6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1163,11 +1163,20 @@ struct kvm_irq_routing_hv_sint {
 	__u32 sint;
 };
 
+struct kvm_irq_routing_xen_evtchn {
+	__u32 port;
+	__u32 vcpu;
+	__u32 priority;
+};
+
+#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1))
+
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_HV_SINT 4
+#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
 
 struct kvm_irq_routing_entry {
 	__u32 gsi;
@@ -1179,6 +1188,7 @@ struct kvm_irq_routing_entry {
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_s390_adapter adapter;
 		struct kvm_irq_routing_hv_sint hv_sint;
+		struct kvm_irq_routing_xen_evtchn xen_evtchn;
 		__u32 pad[8];
 	} u;
 };
@@ -1209,6 +1219,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO		(1 << 2)
 #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
 
 struct kvm_xen_hvm_config {
 	__u32 flags;
-- 
cgit v1.2.3


From d92321bbe46b0ecae0941461379d39599610d869 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 7 Jan 2022 16:06:35 +0000
Subject: ASoC: cs35l41: Update handling of test key registers

In preparation for the addition of PM runtime support move the test
key out of the register patches themselves. This is necessary to
allow the test key to be held during cache synchronisation, which is
required by the OTP settings which were unpacked from the device and
written by the driver.

Also whilst at it, the driver uses a mixture of accessing the test key
register by name and by address, consistently use the name.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20220107160636.6555-2-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index 29a527457b48..56289b67b9a0 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -762,6 +762,8 @@ struct cs35l41_otp_map_element_t {
 extern struct regmap_config cs35l41_regmap_i2c;
 extern struct regmap_config cs35l41_regmap_spi;
 
+int cs35l41_test_key_unlock(struct device *dev, struct regmap *regmap);
+int cs35l41_test_key_lock(struct device *dev, struct regmap *regmap);
 int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap);
 int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid);
 int cs35l41_set_channels(struct device *dev, struct regmap *reg,
-- 
cgit v1.2.3


From f517ba4924ad026f2583553db02f3c8bc69de88b Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 7 Jan 2022 16:06:36 +0000
Subject: ASoC: cs35l41: Add support for hibernate memory retention mode

The cs35l41 supports a low power DSP memory retention mode. Add support
for entering this mode when then device is not in use.

Co-authored-by: David Rhodes <david.rhodes@cirrus.com>
Signed-off-by: David Rhodes <david.rhodes@cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20220107160636.6555-3-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index 56289b67b9a0..bf7f9a9aeba0 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -40,6 +40,9 @@
 #define CS35L41_PROTECT_REL_ERR_IGN	0x00002034
 #define CS35L41_GPIO_PAD_CONTROL	0x0000242C
 #define CS35L41_JTAG_CONTROL		0x00002438
+#define CS35L41_PWRMGT_CTL		0x00002900
+#define CS35L41_WAKESRC_CTL		0x00002904
+#define CS35L41_PWRMGT_STS		0x00002908
 #define CS35L41_PLL_CLK_CTRL		0x00002C04
 #define CS35L41_DSP_CLK_CTRL		0x00002C08
 #define CS35L41_GLOBAL_CLK_CTRL		0x00002C0C
@@ -635,6 +638,8 @@
 #define CS35L41_INPUT_DSP_TX1		0x32
 #define CS35L41_INPUT_DSP_TX2		0x33
 
+#define CS35L41_WR_PEND_STS_MASK	0x2
+
 #define CS35L41_PLL_CLK_SEL_MASK	0x07
 #define CS35L41_PLL_CLK_SEL_SHIFT	0
 #define CS35L41_PLL_CLK_EN_MASK		0x10
-- 
cgit v1.2.3


From 44ea62813f0ab3d718de480504f4dfd0bdd01858 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 3 Sep 2021 18:31:40 -0700
Subject: spi: don't include ptp_clock_kernel.h in spi.h

Commit b42faeee718c ("spi: Add a PTP system timestamp
to the transfer structure") added an include of ptp_clock_kernel.h
to spi.h for struct ptp_system_timestamp but a forward declaration
is enough. Let's use that to limit the number of objects we have
to rebuild every time we touch networking headers.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210904013140.2377609-1-kuba@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index eb7ac8a1e03c..7ab3fed7b804 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -14,12 +14,12 @@
 #include <linux/completion.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio/consumer.h>
-#include <linux/ptp_clock_kernel.h>
 
 #include <uapi/linux/spi/spi.h>
 
 struct dma_chan;
 struct software_node;
+struct ptp_system_timestamp;
 struct spi_controller;
 struct spi_transfer;
 struct spi_controller_mem_ops;
-- 
cgit v1.2.3


From 18451db82ef7f943c60a7fce685f16172bda5106 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Thu, 6 Jan 2022 13:03:56 -0500
Subject: RDMA/core: Calculate UDP source port based on flow label or lqpn/rqpn

Calculate and set UDP source port based on the flow label. If flow label
is not defined in GRH then calculate it based on lqpn/rqpn.

Link: https://lore.kernel.org/r/20220106180359.2915060-2-yanjun.zhu@linux.dev
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_verbs.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6e9ad656ecb7..69d883f7fb41 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4749,6 +4749,23 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
 	return (u32)(v & IB_GRH_FLOWLABEL_MASK);
 }
 
+/**
+ * rdma_get_udp_sport - Calculate and set UDP source port based on the flow
+ *                      label. If flow label is not defined in GRH then
+ *                      calculate it based on lqpn/rqpn.
+ *
+ * @fl:                 flow label from GRH
+ * @lqpn:               local qp number
+ * @rqpn:               remote qp number
+ */
+static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
+{
+	if (!fl)
+		fl = rdma_calc_flow_label(lqpn, rqpn);
+
+	return rdma_flow_label_to_udp_sport(fl);
+}
+
 const struct ib_port_immutable*
 ib_port_immutable_read(struct ib_device *dev, unsigned int port);
 #endif /* IB_VERBS_H */
-- 
cgit v1.2.3


From 5cad43a52ee3caf451cd645baa4beb53a1733dae Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 7 Jan 2022 16:42:29 +0200
Subject: net: dsa: felix: add port fast age support

Add support for flushing the MAC table on a given port in the ocelot
switch library, and use this functionality in the felix DSA driver.

This operation is needed when a port leaves a bridge to become
standalone, and when the learning is disabled, and when the STP state
changes to a state where no FDB entry should be present.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20220107144229.244584-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/soc/mscc/ocelot.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 3e9454b00562..5c3a3597f1d2 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -833,6 +833,7 @@ void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
 			     struct net_device *bridge);
 void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
 			      struct net_device *bridge);
+int ocelot_mact_flush(struct ocelot *ocelot, int port);
 int ocelot_fdb_dump(struct ocelot *ocelot, int port,
 		    dsa_fdb_dump_cb_t *cb, void *data);
 int ocelot_fdb_add(struct ocelot *ocelot, int port,
-- 
cgit v1.2.3


From 3506659e18a61ae525f3b9b4f5af23b4b149d4db Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 28 Nov 2021 14:53:35 -0500
Subject: mm: Add unmap_mapping_folio()

Convert both callers of unmap_mapping_page() to call unmap_mapping_folio()
instead.  Also move zap_details from linux/mm.h to mm/memory.c

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/mm.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 145f045b0ddc..c9cdb26802fb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1825,28 +1825,6 @@ static inline bool can_do_mlock(void) { return false; }
 extern int user_shm_lock(size_t, struct ucounts *);
 extern void user_shm_unlock(size_t, struct ucounts *);
 
-/*
- * Parameter block passed down to zap_pte_range in exceptional cases.
- */
-struct zap_details {
-	struct address_space *zap_mapping;	/* Check page->mapping if set */
-	struct page *single_page;		/* Locked page to be unmapped */
-};
-
-/*
- * We set details->zap_mappings when we want to unmap shared but keep private
- * pages. Return true if skip zapping this page, false otherwise.
- */
-static inline bool
-zap_skip_check_mapping(struct zap_details *details, struct page *page)
-{
-	if (!details || !page)
-		return false;
-
-	return details->zap_mapping &&
-	    (details->zap_mapping != page_rmapping(page));
-}
-
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 			     pte_t pte);
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -1892,7 +1870,6 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 extern int fixup_user_fault(struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
-void unmap_mapping_page(struct page *page);
 void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
@@ -1913,7 +1890,6 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
 	BUG();
 	return -EFAULT;
 }
-static inline void unmap_mapping_page(struct page *page) { }
 static inline void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows) { }
 static inline void unmap_mapping_range(struct address_space *mapping,
-- 
cgit v1.2.3


From 1e84a3d997b74c33491899e31d48774f252213ab Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 2 Dec 2021 16:01:55 -0500
Subject: truncate,shmem: Add truncate_inode_folio()

Convert all callers of truncate_inode_page() to call
truncate_inode_folio() instead, and move the declaration to mm/internal.h.
Move the assertion that the caller is not passing in a tail page to
generic_error_remove_page().  We can't entirely remove the struct page
from the callers yet because the page pointer in the pvec might be a
shadow/dax/swap entry instead of actually a page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c9cdb26802fb..d8b7d7ed14dd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1859,7 +1859,6 @@ extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
-int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
 int invalidate_inode_page(struct page *page);
 
-- 
cgit v1.2.3


From 0e499ed3d7a216706e02eeded562627d3e69dcfd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 1 Sep 2020 23:17:50 -0400
Subject: filemap: Return only folios from find_get_entries()

The callers have all been converted to work on folios, so convert
find_get_entries() to return a batch of folios instead of pages.
We also now return multiple large folios in a single call.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/pagemap.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index eb6e58e106c8..d2259a1da51c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -592,8 +592,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
 	return head + (index & (thp_nr_pages(head) - 1));
 }
 
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-		pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
 unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 			pgoff_t end, unsigned int nr_pages,
 			struct page **pages);
-- 
cgit v1.2.3


From 51dcbdac28d4dde915f78adf08bb3fac87f516e9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 7 Dec 2021 14:15:07 -0500
Subject: mm: Convert find_lock_entries() to use a folio_batch

find_lock_entries() already only returned the head page of folios, so
convert it to return a folio_batch instead of a pagevec.  That cascades
through converting truncate_inode_pages_range() to
delete_from_page_cache_batch() and page_cache_delete_batch().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d2259a1da51c..6e038811f4c8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -16,7 +16,7 @@
 #include <linux/hardirq.h> /* for in_interrupt() */
 #include <linux/hugetlb_inline.h>
 
-struct pagevec;
+struct folio_batch;
 
 static inline bool mapping_empty(struct address_space *mapping)
 {
@@ -936,7 +936,7 @@ static inline void __delete_from_page_cache(struct page *page, void *shadow)
 }
 void replace_page_cache_page(struct page *old, struct page *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
-				  struct pagevec *pvec);
+				  struct folio_batch *fbatch);
 int try_to_release_page(struct page *page, gfp_t gfp);
 bool filemap_release_folio(struct folio *folio, gfp_t gfp);
 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
-- 
cgit v1.2.3


From 1613fac9aaf840af76faa747ea428a714af98dbd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 7 Dec 2021 14:28:49 -0500
Subject: mm: Remove pagevec_remove_exceptionals()

All of its callers now call folio_batch_remove_exceptionals().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagevec.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index c3fa616d7ae7..dda8d5868c81 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -27,7 +27,6 @@ struct pagevec {
 
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_lru_add(struct pagevec *pvec);
-void pagevec_remove_exceptionals(struct pagevec *pvec);
 unsigned pagevec_lookup_range(struct pagevec *pvec,
 			      struct address_space *mapping,
 			      pgoff_t *start, pgoff_t end);
@@ -146,8 +145,5 @@ static inline void folio_batch_release(struct folio_batch *fbatch)
 	pagevec_release((struct pagevec *)fbatch);
 }
 
-static inline void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
-{
-	pagevec_remove_exceptionals((struct pagevec *)fbatch);
-}
+void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
 #endif /* _LINUX_PAGEVEC_H */
-- 
cgit v1.2.3


From 25a8de7f8d970ffa7263bd9d32a08138cd949f17 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 27 Aug 2021 07:21:49 -0400
Subject: XArray: Add xas_advance()

Add a new helper function to help iterate over multi-index entries.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/xarray.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index a91e3d90df8a..d6d5da6ed735 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1580,6 +1580,24 @@ static inline void xas_set(struct xa_state *xas, unsigned long index)
 	xas->xa_node = XAS_RESTART;
 }
 
+/**
+ * xas_advance() - Skip over sibling entries.
+ * @xas: XArray operation state.
+ * @index: Index of last sibling entry.
+ *
+ * Move the operation state to refer to the last sibling entry.
+ * This is useful for loops that normally want to see sibling
+ * entries but sometimes want to skip them.  Use xas_set() if you
+ * want to move to an index which is not part of this entry.
+ */
+static inline void xas_advance(struct xa_state *xas, unsigned long index)
+{
+	unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0;
+
+	xas->xa_index = index;
+	xas->xa_offset = (index >> shift) & XA_CHUNK_MASK;
+}
+
 /**
  * xas_set_order() - Set up XArray operation state for a multislot entry.
  * @xas: XArray operation state.
-- 
cgit v1.2.3


From 6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 27 Jun 2020 22:19:08 -0400
Subject: mm: Use multi-index entries in the page cache

We currently store large folios as 2^N consecutive entries.  While this
consumes rather more memory than necessary, it also turns out to be buggy.
A writeback operation which starts within a tail page of a dirty folio will
not write back the folio as the xarray's dirty bit is only set on the
head index.  With multi-index entries, the dirty bit will be found no
matter where in the folio the operation starts.

This does end up simplifying the page cache slightly, although not as
much as I had hoped.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/pagemap.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6e038811f4c8..704cb1b4b15d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1125,16 +1125,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
 		VM_BUG_ON_PAGE(PageTail(page), page);
 		array[i++] = page;
 		rac->_batch_count += thp_nr_pages(page);
-
-		/*
-		 * The page cache isn't using multi-index entries yet,
-		 * so the xas cursor needs to be manually moved to the
-		 * next index.  This can be removed once the page cache
-		 * is converted.
-		 */
-		if (PageHead(page))
-			xas_set(&xas, rac->_index + rac->_batch_count);
-
 		if (i == array_sz)
 			break;
 	}
-- 
cgit v1.2.3


From 50a483405c420f5f35b8dbb71425459835ae44eb Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 6 Dec 2021 11:35:06 +0900
Subject: kbuild: move headers_check.pl to usr/include/

This script is only used by usr/include/Makefile. Make it local to
the directory.

Update the comment in include/uapi/linux/soundcard.h because
'make headers_check' is no longer functional.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 include/uapi/linux/soundcard.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/soundcard.h b/include/uapi/linux/soundcard.h
index f3b21f989872..ac1318793a86 100644
--- a/include/uapi/linux/soundcard.h
+++ b/include/uapi/linux/soundcard.h
@@ -1051,7 +1051,7 @@ typedef struct mixer_vol_table {
  *	the GPL version of OSS-4.x and build against that version
  *	of the header.
  *
- *	We redefine the extern keyword so that make headers_check
+ *	We redefine the extern keyword so that usr/include/headers_check.pl
  *	does not complain about SEQ_USE_EXTBUF.
  */
 #define SEQ_DECLAREBUF()		SEQ_USE_EXTBUF()
-- 
cgit v1.2.3


From e32cf5dfbe227b355776948b2c9b5691b84d1cbd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 22 Dec 2021 22:10:09 -0600
Subject: kthread: Generalize pf_io_worker so it can point to struct kthread

The point of using set_child_tid to hold the kthread pointer was that
it already did what is necessary.  There are now restrictions on when
set_child_tid can be initialized and when set_child_tid can be used in
schedule_tail.  Which indicates that continuing to use set_child_tid
to hold the kthread pointer is a bad idea.

Instead of continuing to use the set_child_tid field of task_struct
generalize the pf_io_worker field of task_struct and use it to hold
the kthread pointer.

Rename pf_io_worker (which is a void * pointer) to worker_private so
it can be used to store kthreads struct kthread pointer.  Update the
kthread code to store the kthread pointer in the worker_private field.
Remove the places where set_child_tid had to be dealt with carefully
because kthreads also used it.

Link: https://lkml.kernel.org/r/CAHk-=wgtFAA9SbVYg0gR1tqPMC17-NYcs0GQkaYg1bGhh1uJQQ@mail.gmail.com
Link: https://lkml.kernel.org/r/87a6grvqy8.fsf_-_@email.froward.int.ebiederm.org
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..52f2fdffa3ab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -987,8 +987,8 @@ struct task_struct {
 	/* CLONE_CHILD_CLEARTID: */
 	int __user			*clear_child_tid;
 
-	/* PF_IO_WORKER */
-	void				*pf_io_worker;
+	/* PF_KTHREAD | PF_IO_WORKER */
+	void				*worker_private;
 
 	u64				utime;
 	u64				stime;
-- 
cgit v1.2.3


From 3367d1bd738c01b2737eaab7d922bfe5f1a41f38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 8 Jan 2022 16:31:58 +0100
Subject: power: supply: Provide stubs for charge_behaviour helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_SYSFS is not enabled provide stubs for the helper functions
to not break their callers.

Fixes: 539b9c94ac83 ("power: supply: add helpers for charge_behaviour sysfs")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20220108153158.189489-1-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/power_supply.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 71f0379c2af8..f6b9ed4630fa 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -553,6 +553,21 @@ ssize_t power_supply_charge_behaviour_show(struct device *dev,
 					   char *buf);
 
 int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf);
+#else
+static inline
+ssize_t power_supply_charge_behaviour_show(struct device *dev,
+					   unsigned int available_behaviours,
+					   enum power_supply_charge_behaviour behaviour,
+					   char *buf)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int power_supply_charge_behaviour_parse(unsigned int available_behaviours,
+						      const char *buf)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif /* __LINUX_POWER_SUPPLY_H__ */
-- 
cgit v1.2.3


From 2f824d4d197e02275562359a2ae5274177ce500c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 8 Jan 2022 09:48:31 -0600
Subject: signal: Remove SIGNAL_GROUP_COREDUMP

After the previous cleanups "signal->core_state" is set whenever
SIGNAL_GROUP_COREDUMP is set and "signal->core_state" is tested
whenver the code wants to know if a coredump is in progress.  The
remaining tests of SIGNAL_GROUP_COREDUMP also test to see if
SIGNAL_GROUP_EXIT is set.  Similarly the only place that sets
SIGNAL_GROUP_COREDUMP also sets SIGNAL_GROUP_EXIT.

Which makes SIGNAL_GROUP_COREDUMP unecessary and redundant. So stop
setting SIGNAL_GROUP_COREDUMP, stop testing SIGNAL_GROUP_COREDUMP, and
remove it's definition.

With the setting of SIGNAL_GROUP_COREDUMP gone, coredump_finish no
longer needs to clear SIGNAL_GROUP_COREDUMP out of signal->flags
by setting SIGNAL_GROUP_EXIT.

Link: https://lkml.kernel.org/r/20211213225350.27481-5-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index fa26d2a58413..ecc10e148799 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -256,7 +256,6 @@ struct signal_struct {
 #define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
 #define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
 #define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
-#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
 /*
  * Pending notifications to parent.
  */
@@ -272,7 +271,7 @@ struct signal_struct {
 static inline void signal_set_stop_flags(struct signal_struct *sig,
 					 unsigned int flags)
 {
-	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+	WARN_ON(sig->flags & SIGNAL_GROUP_EXIT);
 	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
 }
 
-- 
cgit v1.2.3


From 60700e38fb68e800607ca7a027060d5419fc5798 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 6 Jun 2021 13:47:53 -0500
Subject: signal: Rename group_exit_task group_exec_task

The only remaining user of group_exit_task is exec.  Rename the field
so that it is clear which part of the code uses it.

Update the comment above the definition of group_exec_task to document
how it is currently used.

Link: https://lkml.kernel.org/r/20211213225350.27481-7-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ecc10e148799..d3248aba5183 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -109,13 +109,9 @@ struct signal_struct {
 
 	/* thread group exit support */
 	int			group_exit_code;
-	/* overloaded:
-	 * - notify group_exit_task when ->count is equal to notify_count
-	 * - everyone except group_exit_task is stopped during signal delivery
-	 *   of fatal signals, group_exit_task processes the signal.
-	 */
+	/* notify group_exec_task when notify_count is less or equal to 0 */
 	int			notify_count;
-	struct task_struct	*group_exit_task;
+	struct task_struct	*group_exec_task;
 
 	/* thread group stop support, overloads group_exit_code too */
 	int			group_stop_count;
@@ -275,11 +271,11 @@ static inline void signal_set_stop_flags(struct signal_struct *sig,
 	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
 }
 
-/* If true, all threads except ->group_exit_task have pending SIGKILL */
+/* If true, all threads except ->group_exec_task have pending SIGKILL */
 static inline int signal_group_exit(const struct signal_struct *sig)
 {
 	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exit_task != NULL);
+		(sig->group_exec_task != NULL);
 }
 
 extern void flush_signals(struct task_struct *);
-- 
cgit v1.2.3


From 49697335e0b441b0553598c1b48ee9ebb053d2f1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 24 Jun 2021 02:14:30 -0500
Subject: signal: Remove the helper signal_group_exit

This helper is misleading.  It tests for an ongoing exec as well as
the process having received a fatal signal.

Sometimes it is appropriate to treat an on-going exec differently than
a process that is shutting down due to a fatal signal.  In particular
taking the fast path out of exit_signals instead of retargeting
signals is not appropriate during exec, and not changing the the exit
code in do_group_exit during exec.

Removing the helper makes it more obvious what is going on as both
cases must be coded for explicitly.

While removing the helper fix the two cases where I have observed
using signal_group_exit resulted in the wrong result.

In exit_signals only test for SIGNAL_GROUP_EXIT so that signals are
retargetted during an exec.

In do_group_exit use 0 as the exit code during an exec as de_thread
does not set group_exit_code.  As best as I can determine
group_exit_code has been is set to 0 most of the time during
de_thread.  During a thread group stop group_exit_code is set to the
stop signal and when the thread group receives SIGCONT group_exit_code
is reset to 0.

Link: https://lkml.kernel.org/r/20211213225350.27481-8-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d3248aba5183..b6ecb9fc4cd2 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -271,13 +271,6 @@ static inline void signal_set_stop_flags(struct signal_struct *sig,
 	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
 }
 
-/* If true, all threads except ->group_exec_task have pending SIGKILL */
-static inline int signal_group_exit(const struct signal_struct *sig)
-{
-	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exec_task != NULL);
-}
-
 extern void flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
-- 
cgit v1.2.3


From 2d4bcf886e42f0f4846a3d9bdc3a90d278903a2e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 8 Jan 2022 11:23:02 -0600
Subject: exit: Remove profile_task_exit & profile_munmap

When I say remove I mean remove.  All profile_task_exit and
profile_munmap do is call a blocking notifier chain.  The helpers
profile_task_register and profile_task_unregister are not called
anywhere in the tree.  Which means this is all dead code.

So remove the dead code and make it easier to read do_exit.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/20220103213312.9144-1-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/profile.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index fd18ca96f557..f7eb2b57d890 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -31,11 +31,6 @@ static inline int create_proc_profile(void)
 }
 #endif
 
-enum profile_type {
-	PROFILE_TASK_EXIT,
-	PROFILE_MUNMAP
-};
-
 #ifdef CONFIG_PROFILING
 
 extern int prof_on __read_mostly;
@@ -66,23 +61,14 @@ static inline void profile_hit(int type, void *ip)
 struct task_struct;
 struct mm_struct;
 
-/* task is in do_exit() */
-void profile_task_exit(struct task_struct * task);
-
 /* task is dead, free task struct ? Returns 1 if
  * the task was taken, 0 if the task should be freed.
  */
 int profile_handoff_task(struct task_struct * task);
 
-/* sys_munmap */
-void profile_munmap(unsigned long addr);
-
 int task_handoff_register(struct notifier_block * n);
 int task_handoff_unregister(struct notifier_block * n);
 
-int profile_event_register(enum profile_type, struct notifier_block * n);
-int profile_event_unregister(enum profile_type, struct notifier_block * n);
-
 #else
 
 #define prof_on 0
@@ -117,19 +103,7 @@ static inline int task_handoff_unregister(struct notifier_block * n)
 	return -ENOSYS;
 }
 
-static inline int profile_event_register(enum profile_type t, struct notifier_block * n)
-{
-	return -ENOSYS;
-}
-
-static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n)
-{
-	return -ENOSYS;
-}
-
-#define profile_task_exit(a) do { } while (0)
 #define profile_handoff_task(a) (0)
-#define profile_munmap(a) do { } while (0)
 
 #endif /* CONFIG_PROFILING */
 
-- 
cgit v1.2.3


From 2873cd31a20c25b5e763b35e5fb886f0938c6dd5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 8 Jan 2022 10:03:24 -0600
Subject: exit: Remove profile_handoff_task

All profile_handoff_task does is notify the task_free_notifier chain.
The helpers task_handoff_register and task_handoff_unregister are used
to add and delete entries from that chain and are never called.

So remove the dead code and make it much easier to read and reason
about __put_task_struct.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lkml.kernel.org/r/87fspyw6m0.fsf@email.froward.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/profile.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index f7eb2b57d890..11db1ec516e2 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -61,14 +61,6 @@ static inline void profile_hit(int type, void *ip)
 struct task_struct;
 struct mm_struct;
 
-/* task is dead, free task struct ? Returns 1 if
- * the task was taken, 0 if the task should be freed.
- */
-int profile_handoff_task(struct task_struct * task);
-
-int task_handoff_register(struct notifier_block * n);
-int task_handoff_unregister(struct notifier_block * n);
-
 #else
 
 #define prof_on 0
@@ -93,17 +85,6 @@ static inline void profile_hit(int type, void *ip)
 	return;
 }
 
-static inline int task_handoff_register(struct notifier_block * n)
-{
-	return -ENOSYS;
-}
-
-static inline int task_handoff_unregister(struct notifier_block * n)
-{
-	return -ENOSYS;
-}
-
-#define profile_handoff_task(a) (0)
 
 #endif /* CONFIG_PROFILING */
 
-- 
cgit v1.2.3


From 4264178416cd52a55a3eccbefb3973866e060280 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 20 Dec 2021 16:28:53 -0600
Subject: ptrace: Remove unused regs argument from ptrace_report_syscall

Link: https://lkml.kernel.org/r/20220103213312.9144-7-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/tracehook.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 2564b7434b4d..88c007ab5ebc 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -54,8 +54,7 @@ struct linux_binprm;
 /*
  * ptrace report for syscall entry and exit looks identical.
  */
-static inline int ptrace_report_syscall(struct pt_regs *regs,
-					unsigned long message)
+static inline int ptrace_report_syscall(unsigned long message)
 {
 	int ptrace = current->ptrace;
 
@@ -102,7 +101,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs,
 static inline __must_check int tracehook_report_syscall_entry(
 	struct pt_regs *regs)
 {
-	return ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_ENTRY);
+	return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY);
 }
 
 /**
@@ -127,7 +126,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 	if (step)
 		user_single_step_report(regs);
 	else
-		ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_EXIT);
+		ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT);
 }
 
 /**
-- 
cgit v1.2.3


From 40595cdc93edf4110c0f0c0b06f8d82008f23929 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 16 Dec 2021 12:20:13 -0500
Subject: nfs: block notification on fs with its own ->lock

NFSv4.1 supports an optional lock notification feature which notifies
the client when a lock comes available.  (Normally NFSv4 clients just
poll for locks if necessary.)  To make that work, we need to request a
blocking lock from the filesystem.

We turned that off for NFS in commit f657f8eef3ff ("nfs: don't atempt
blocking locks on nfs reexports") [sic] because it actually blocks the
nfsd thread while waiting for the lock.

Thanks to Vasily Averin for pointing out that NFS isn't the only
filesystem with that problem.

Any filesystem that leaves ->lock NULL will use posix_lock_file(), which
does the right thing.  Simplest is just to assume that any filesystem
that defines its own ->lock is not safe to request a blocking lock from.

So, this patch mostly reverts commit f657f8eef3ff ("nfs: don't atempt
blocking locks on nfs reexports") [sic] and commit b840be2f00c0 ("lockd:
don't attempt blocking locks on nfs reexports"), and instead uses a
check of ->lock (Vasily's suggestion) to decide whether to support
blocking lock notifications on a given filesystem.  Also add a little
documentation.

Perhaps someday we could add back an export flag later to allow
filesystems with "good" ->lock methods to support blocking lock
notifications.

Reported-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
[ cel: Description rewritten to address checkpatch nits ]
[ cel: Fixed warning when SUNRPC debugging is disabled ]
[ cel: Fixed NULL check ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Vasily Averin <vvs@virtuozzo.com>
---
 include/linux/exportfs.h    | 2 --
 include/linux/lockd/lockd.h | 9 +++++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 3260fe714846..fe848901fcc3 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -221,8 +221,6 @@ struct export_operations {
 #define EXPORT_OP_NOATOMIC_ATTR		(0x10) /* Filesystem cannot supply
 						  atomic attribute updates
 						*/
-#define EXPORT_OP_SYNC_LOCKS		(0x20) /* Filesystem can't do
-						  asychronous blocking locks */
 	unsigned long	flags;
 };
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c4ae6506b8b3..fcef192e5e45 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -303,10 +303,15 @@ void		  nlmsvc_invalidate_all(void);
 int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
 int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
+static inline struct file *nlmsvc_file_file(struct nlm_file *file)
+{
+	return file->f_file[O_RDONLY] ?
+	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
+}
+
 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {
-	return locks_inode(file->f_file[O_RDONLY] ?
-			   file->f_file[O_RDONLY] : file->f_file[O_WRONLY]);
+	return locks_inode(nlmsvc_file_file(file));
 }
 
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
-- 
cgit v1.2.3


From 7d30198ee24f2ddcc4fefcd38a9b76bd8ab31360 Mon Sep 17 00:00:00 2001
From: Andrew Zaborowski <andrew.zaborowski@intel.com>
Date: Tue, 9 Nov 2021 16:16:49 +0100
Subject: keys: X.509 public key issuer lookup without AKID

There are non-root X.509 v3 certificates in use out there that contain
no Authority Key Identifier extension (RFC5280 section 4.2.1.1).  For
trust verification purposes the kernel asymmetric key type keeps two
struct asymmetric_key_id instances that the key can be looked up by,
and another two to look up the key's issuer.  The x509 public key type
and the PKCS7 type generate them from the SKID and AKID extensions in
the certificate.  In effect current code has no way to look up the
issuer certificate for verification without the AKID.

To remedy this, add a third asymmetric_key_id blob to the arrays in
both asymmetric_key_id's (for certficate subject) and in the
public_keys_signature's auth_ids (for issuer lookup), using just raw
subject and issuer DNs from the certificate.  Adapt
asymmetric_key_ids() and its callers to use the third ID for lookups
when none of the other two are available.  Attempt to keep the logic
intact when they are, to minimise behaviour changes.  Adapt the
restrict functions' NULL-checks to include that ID too.  Do not modify
the lookup logic in pkcs7_verify.c, the AKID extensions are still
required there.

Internally use a new "dn:" prefix to the search specifier string
generated for the key lookup in find_asymmetric_key().  This tells
asymmetric_key_match_preparse to only match the data against the raw
DN in the third ID and shouldn't conflict with search specifiers
already in use.

In effect implement what (2) in the struct asymmetric_key_id comment
(include/keys/asymmetric-type.h) is probably talking about already, so
do not modify that comment.  It is also how "openssl verify" looks up
issuer certificates without the AKID available.  Lookups by the raw
DN are unambiguous only provided that the CAs respect the condition in
RFC5280 4.2.1.1 that the AKID may only be omitted if the CA uses
a single signing key.

The following is an example of two things that this change enables.
A self-signed ceritficate is generated following the example from
https://letsencrypt.org/docs/certificates-for-localhost/, and can be
looked up by an identifier and verified against itself by linking to a
restricted keyring -- both things not possible before due to the missing
AKID extension:

$ openssl req -x509 -out localhost.crt -outform DER -keyout localhost.key \
  -newkey rsa:2048 -nodes -sha256 \
  -subj '/CN=localhost' -extensions EXT -config <( \
   echo -e "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\n" \
          "subjectAltName=DNS:localhost\nkeyUsage=digitalSignature\n" \
	  "extendedKeyUsage=serverAuth")
$ keyring=`keyctl newring test @u`
$ trusted=`keyctl padd asymmetric trusted $keyring < localhost.crt`; \
  echo $trusted
39726322
$ keyctl search $keyring asymmetric dn:3112301006035504030c096c6f63616c686f7374
39726322
$ keyctl restrict_keyring $keyring asymmetric key_or_keyring:$trusted
$ keyctl padd asymmetric verified $keyring < localhost.crt

Signed-off-by: Andrew Zaborowski <andrew.zaborowski@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/crypto/public_key.h    | 2 +-
 include/keys/asymmetric-type.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index f603325c0c30..68f7aa2a7e55 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -36,7 +36,7 @@ extern void public_key_free(struct public_key *key);
  * Public key cryptography signature data
  */
 struct public_key_signature {
-	struct asymmetric_key_id *auth_ids[2];
+	struct asymmetric_key_id *auth_ids[3];
 	u8 *s;			/* Signature */
 	u8 *digest;
 	u32 s_size;		/* Number of bytes in signature */
diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h
index c432fdb8547f..6c5d4963e15b 100644
--- a/include/keys/asymmetric-type.h
+++ b/include/keys/asymmetric-type.h
@@ -53,7 +53,7 @@ struct asymmetric_key_id {
 };
 
 struct asymmetric_key_ids {
-	void		*id[2];
+	void		*id[3];
 };
 
 extern bool asymmetric_key_id_same(const struct asymmetric_key_id *kid1,
@@ -81,6 +81,7 @@ const struct public_key *asymmetric_key_public_key(const struct key *key)
 extern struct key *find_asymmetric_key(struct key *keyring,
 				       const struct asymmetric_key_id *id_0,
 				       const struct asymmetric_key_id *id_1,
+				       const struct asymmetric_key_id *id_2,
 				       bool partial);
 
 /*
-- 
cgit v1.2.3


From 0aa698787aa2a9e8840987e54ba2982559de6404 Mon Sep 17 00:00:00 2001
From: axelj <axelj@axis.com>
Date: Mon, 13 Dec 2021 08:09:25 +0100
Subject: tpm: Add Upgrade/Reduced mode support for TPM2 modules

If something went wrong during the TPM firmware upgrade, like power
failure or the firmware image file get corrupted, the TPM might end
up in Upgrade or Failure mode upon the next start. The state is
persistent between the TPM power cycle/restart.

According to TPM specification:
 * If the TPM is in Upgrade mode, it will answer with TPM2_RC_UPGRADE
   to all commands except TPM2_FieldUpgradeData(). It may also accept
   other commands if it is able to complete them using the previously
   installed firmware.
 * If the TPM is in Failure mode, it will allow performing TPM
   initialization but will not provide any crypto operations.
   Will happily respond to Field Upgrade calls.

Change the behavior of the tpm2_auto_startup(), so it detects the active
running mode of the TPM by adding the following checks.  If
tpm2_do_selftest() call returns TPM2_RC_UPGRADE, the TPM is in Upgrade
mode.
If the TPM is in Failure mode, it will successfully respond to both
tpm2_do_selftest() and tpm2_startup() calls. Although, will fail to
answer to tpm2_get_cc_attrs_tbl(). Use this fact to conclude that TPM is
in Failure mode.

If detected that the TPM is in the Upgrade or Failure mode, the function
sets TPM_CHIP_FLAG_FIRMWARE_UPGRADE_MODE flag.

The TPM_CHIP_FLAG_FIRMWARE_UPGRADE_MODE flag is used later during driver
initialization/deinitialization to disable functionality which makes no
sense or will fail in the current TPM state. Following functionality is
affected:
 * Do not register TPM as a hwrng
 * Do not register sysfs entries which provide information impossible to
   obtain in limited mode
 * Do not register resource managed character device

Signed-off-by: axelj <axelj@axis.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 12d827734686..dfeb25a0362d 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -207,6 +207,7 @@ enum tpm2_return_codes {
 	TPM2_RC_INITIALIZE	= 0x0100, /* RC_VER1 */
 	TPM2_RC_FAILURE		= 0x0101,
 	TPM2_RC_DISABLED	= 0x0120,
+	TPM2_RC_UPGRADE		= 0x012D,
 	TPM2_RC_COMMAND_CODE    = 0x0143,
 	TPM2_RC_TESTING		= 0x090A, /* RC_WARN */
 	TPM2_RC_REFERENCE_H0	= 0x0910,
@@ -278,6 +279,7 @@ enum tpm_chip_flags {
 	TPM_CHIP_FLAG_HAVE_TIMEOUTS	= BIT(4),
 	TPM_CHIP_FLAG_ALWAYS_POWERED	= BIT(5),
 	TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED	= BIT(6),
+	TPM_CHIP_FLAG_FIRMWARE_UPGRADE	= BIT(7),
 };
 
 #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
@@ -399,6 +401,14 @@ static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value)
 	tpm_buf_append(buf, (u8 *) &value2, 4);
 }
 
+/*
+ * Check if TPM device is in the firmware upgrade mode.
+ */
+static inline bool tpm_is_firmware_upgrade(struct tpm_chip *chip)
+{
+	return chip->flags & TPM_CHIP_FLAG_FIRMWARE_UPGRADE;
+}
+
 static inline u32 tpm2_rc_value(u32 rc)
 {
 	return (rc & BIT(7)) ? rc & 0xff : rc;
-- 
cgit v1.2.3


From 292c33c95defd0b814fec1fc8cd60d16556cf7b8 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 7 Jan 2022 08:52:28 +0800
Subject: block: fix old-style declaration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the 'inline' keyword to the front of 'void'.

Remove a warning found by clang(make W=1 LLVM=1)
./include/linux/blk-mq.h:259:1: warning: ‘inline’ is not at beginning of
declaration

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220107005228.103927-1-yang.lee@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f40a05ecca4a..d319ffa59354 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -256,7 +256,7 @@ static inline unsigned short req_get_ioprio(struct request *req)
  * @rq: The request to move
  * @prev: The request preceding @rq in @src (NULL if @rq is the head)
  */
-static void inline rq_list_move(struct request **src, struct request **dst,
+static inline void rq_list_move(struct request **src, struct request **dst,
 				struct request *rq, struct request *prev)
 {
 	if (prev)
-- 
cgit v1.2.3


From 0ea9fc15b1d7d6636d429e74ffe3f86bf2f2f7d6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 23 Nov 2021 17:05:07 +0100
Subject: fs/locks: fix fcntl_getlk64/fcntl_setlk64 stub prototypes

My patch to rework oabi fcntl64() introduced a harmless
sparse warning when file locking is disabled:

   arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: sparse: incorrect type in argument 3 (different address spaces) @@     expected struct flock64 [noderef] __user *user @@     got struct flock64 * @@
   arch/arm/kernel/sys_oabi-compat.c:251:51: sparse:     expected struct flock64 [noderef] __user *user
   arch/arm/kernel/sys_oabi-compat.c:251:51: sparse:     got struct flock64 *
   arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: sparse: incorrect type in argument 4 (different address spaces) @@     expected struct flock64 [noderef] __user *user @@     got struct flock64 * @@
   arch/arm/kernel/sys_oabi-compat.c:265:55: sparse:     expected struct flock64 [noderef] __user *user
   arch/arm/kernel/sys_oabi-compat.c:265:55: sparse:     got struct flock64 *

When file locking is enabled, everything works correctly and the
right data gets passed, but the stub declarations in linux/fs.h
did not get modified when the calling conventions changed in an
earlier patch.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 7e2d8c29ecdd ("ARM: 9111/1: oabi-compat: rework fcntl64() emulation")
Fixes: a75d30c77207 ("fs/locks: pass kernel struct flock to fcntl_getlk/setlk")
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..5122d13775c2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1220,13 +1220,13 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file,
 
 #if BITS_PER_LONG == 32
 static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
-				struct flock64 __user *user)
+				struct flock64 *user)
 {
 	return -EINVAL;
 }
 
 static inline int fcntl_setlk64(unsigned int fd, struct file *file,
-				unsigned int cmd, struct flock64 __user *user)
+				unsigned int cmd, struct flock64 *user)
 {
 	return -EACCES;
 }
-- 
cgit v1.2.3


From 613a0c67d12f33dcbeec2836f5fe60d05b4c18c0 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sun, 26 Dec 2021 01:12:41 +0800
Subject: netfilter: conntrack: Use max() instead of doing it manually

Fix following coccicheck warning:

./include/net/netfilter/nf_conntrack.h:282:16-17: WARNING opportunity
for max().

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 871489df63c6..a4a14f3a5e38 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -279,7 +279,7 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
 {
 	s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
 
-	return timeout > 0 ? timeout : 0;
+	return max(timeout, 0);
 }
 
 static inline bool nf_ct_is_expired(const struct nf_conn *ct)
-- 
cgit v1.2.3


From 719774377622bc4025d2a74f551b5dc2158c6c30 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jan 2022 05:03:22 +0100
Subject: netfilter: conntrack: convert to refcount_t api

Convert nf_conn reference counting from atomic_t to refcount_t based api.
refcount_t api provides more runtime sanity checks and will warn on
certain constructs, e.g. refcount_inc() on a zero reference count, which
usually indicates use-after-free.

For this reason template allocation is changed to init the refcount to
1, the subsequenct add operations are removed.

Likewise, init_conntrack() is changed to set the initial refcount to 1
instead refcount_inc().

This is safe because the new entry is not (yet) visible to other cpus.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 700ea077ce2d..a03f7a80b9ab 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -2,7 +2,7 @@
 #ifndef _NF_CONNTRACK_COMMON_H
 #define _NF_CONNTRACK_COMMON_H
 
-#include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <uapi/linux/netfilter/nf_conntrack_common.h>
 
 struct ip_conntrack_stat {
@@ -25,19 +25,19 @@ struct ip_conntrack_stat {
 #define NFCT_PTRMASK	~(NFCT_INFOMASK)
 
 struct nf_conntrack {
-	atomic_t use;
+	refcount_t use;
 };
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
-	if (nfct && atomic_dec_and_test(&nfct->use))
+	if (nfct && refcount_dec_and_test(&nfct->use))
 		nf_conntrack_destroy(nfct);
 }
 static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 {
 	if (nfct)
-		atomic_inc(&nfct->use);
+		refcount_inc(&nfct->use);
 }
 
 #endif /* _NF_CONNTRACK_COMMON_H */
-- 
cgit v1.2.3


From 3fce16493dc1aa2c9af3d7e7bd360dfe203a3e6a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jan 2022 05:03:23 +0100
Subject: netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook

ip_ct_attach predates struct nf_ct_hook, we can place it there and
remove the exported symbol.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 3fda1a508733..e0e3f3355ab1 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -440,7 +440,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <linux/netfilter/nf_conntrack_zones_common.h>
 
-extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
 struct nf_conntrack_tuple;
 bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
@@ -463,6 +462,7 @@ struct nf_ct_hook {
 	void (*destroy)(struct nf_conntrack *);
 	bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
 			      const struct sk_buff *);
+	void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
 
-- 
cgit v1.2.3


From 285c8a7a58158cb1805c97ff03875df2ba2ea1fe Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jan 2022 05:03:24 +0100
Subject: netfilter: make function op structures const

No functional changes, these structures should be const.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index e0e3f3355ab1..15e71bfff726 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -381,13 +381,13 @@ struct nf_nat_hook {
 				  enum ip_conntrack_dir dir);
 };
 
-extern struct nf_nat_hook __rcu *nf_nat_hook;
+extern const struct nf_nat_hook __rcu *nf_nat_hook;
 
 static inline void
 nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 {
 #if IS_ENABLED(CONFIG_NF_NAT)
-	struct nf_nat_hook *nat_hook;
+	const struct nf_nat_hook *nat_hook;
 
 	rcu_read_lock();
 	nat_hook = rcu_dereference(nf_nat_hook);
@@ -464,7 +464,7 @@ struct nf_ct_hook {
 			      const struct sk_buff *);
 	void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
 };
-extern struct nf_ct_hook __rcu *nf_ct_hook;
+extern const struct nf_ct_hook __rcu *nf_ct_hook;
 
 struct nlattr;
 
@@ -479,7 +479,7 @@ struct nfnl_ct_hook {
 	void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo, s32 off);
 };
-extern struct nfnl_ct_hook __rcu *nfnl_ct_hook;
+extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
 
 /**
  * nf_skb_duplicated - TEE target has sent a packet
-- 
cgit v1.2.3


From 6ae7989c9af0d98ab64196f4f4c6f6499454bd23 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jan 2022 05:03:25 +0100
Subject: netfilter: conntrack: avoid useless indirection during conntrack
 destruction

nf_ct_put() results in a usesless indirection:

nf_ct_put -> nf_conntrack_put -> nf_conntrack_destroy -> rcu readlock +
indirect call of ct_hooks->destroy().

There are two _put helpers:
nf_ct_put and nf_conntrack_put.  The latter is what should be used in
code that MUST NOT cause a linker dependency on the conntrack module
(e.g. calls from core network stack).

Everyone else should call nf_ct_put() instead.

A followup patch will convert a few nf_conntrack_put() calls to
nf_ct_put(), in particular from modules that already have a conntrack
dependency such as act_ct or even nf_conntrack itself.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h | 2 ++
 include/net/netfilter/nf_conntrack.h          | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index a03f7a80b9ab..2770db2fa080 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -29,6 +29,8 @@ struct nf_conntrack {
 };
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
+
+/* like nf_ct_put, but without module dependency on nf_conntrack */
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && refcount_dec_and_test(&nfct->use))
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a4a14f3a5e38..8731d5bcb47d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -76,6 +76,8 @@ struct nf_conn {
 	 * Hint, SKB address this struct and refcnt via skb->_nfct and
 	 * helpers nf_conntrack_get() and nf_conntrack_put().
 	 * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
+	 * except that the latter uses internal indirection and does not
+	 * result in a conntrack module dependency.
 	 * beware nf_ct_get() is different and don't inc refcnt.
 	 */
 	struct nf_conntrack ct_general;
@@ -170,11 +172,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
 	return (struct nf_conn *)(nfct & NFCT_PTRMASK);
 }
 
+void nf_ct_destroy(struct nf_conntrack *nfct);
+
 /* decrement reference count on a conntrack */
 static inline void nf_ct_put(struct nf_conn *ct)
 {
-	WARN_ON(!ct);
-	nf_conntrack_put(&ct->ct_general);
+	if (ct && refcount_dec_and_test(&ct->ct_general.use))
+		nf_ct_destroy(&ct->ct_general);
 }
 
 /* Protocol module loading */
-- 
cgit v1.2.3


From 6316136ec6e3dd1c302f7e7289a9ee46ecc610ae Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jan 2022 15:46:16 +0100
Subject: netfilter: egress: avoid a lockdep splat

include/linux/netfilter_netdev.h:97 suspicious rcu_dereference_check() usage!
2 locks held by sd-resolve/1100:
 0: ..(rcu_read_lock_bh){1:3}, at: ip_finish_output2
 1: ..(rcu_read_lock_bh){1:3}, at: __dev_queue_xmit
 __dev_queue_xmit+0 ..

The helper has two callers, one uses rcu_read_lock, the other
rcu_read_lock_bh().  Annotate the dereference to reflect this.

Fixes: 42df6e1d221dd ("netfilter: Introduce egress hook")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_netdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
index b71b57a83bb4..b4dd96e4dc8d 100644
--- a/include/linux/netfilter_netdev.h
+++ b/include/linux/netfilter_netdev.h
@@ -94,7 +94,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
 		return skb;
 #endif
 
-	e = rcu_dereference(dev->nf_hooks_egress);
+	e = rcu_dereference_check(dev->nf_hooks_egress, rcu_read_lock_bh_held());
 	if (!e)
 		return skb;
 
-- 
cgit v1.2.3


From 2c865a8a28a10e9800a3dd07ca339d24563e3d65 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 9 Jan 2022 17:11:19 +0100
Subject: netfilter: nf_tables: add rule blob layout

This patch adds a blob layout per chain to represent the ruleset in the
packet datapath.

	size (unsigned long)
	struct nft_rule_dp
	  struct nft_expr
	  ...
        struct nft_rule_dp
          struct nft_expr
          ...
        struct nft_rule_dp (is_last=1)

The new structure nft_rule_dp represents the rule in a more compact way
(smaller memory footprint) compared to the control-plane nft_rule
structure.

The ruleset blob is a read-only data structure. The first field contains
the blob size, then the rules containing expressions. There is a trailing
rule which is used by the tracing infrastructure which is equivalent to
the NULL rule marker in the previous representation. The blob size field
does not include the size of this trailing rule marker.

The ruleset blob is generated from the commit path.

This patch reuses the infrastructure available since 0cbc06b3faba
("netfilter: nf_tables: remove synchronize_rcu in commit phase") to
build the array of rules per chain.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index a0d9e0b47ab8..5a046b01bdab 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -974,6 +974,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
 
 #define NFT_CHAIN_POLICY_UNSET		U8_MAX
 
+struct nft_rule_dp {
+	u64				is_last:1,
+					dlen:12,
+					handle:42;	/* for tracing */
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+struct nft_rule_blob {
+	unsigned long			size;
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(struct nft_rule_dp))));
+};
+
 /**
  *	struct nft_chain - nf_tables chain
  *
@@ -987,8 +1001,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
  *	@name: name of the chain
  */
 struct nft_chain {
-	struct nft_rule			*__rcu *rules_gen_0;
-	struct nft_rule			*__rcu *rules_gen_1;
+	struct nft_rule_blob		__rcu *blob_gen_0;
+	struct nft_rule_blob		__rcu *blob_gen_1;
 	struct list_head		rules;
 	struct list_head		list;
 	struct rhlist_head		rhlhead;
@@ -1003,7 +1017,7 @@ struct nft_chain {
 	u8				*udata;
 
 	/* Only used during control plane commit phase: */
-	struct nft_rule			**rules_next;
+	struct nft_rule_blob		*blob_next;
 };
 
 int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
@@ -1321,7 +1335,7 @@ struct nft_traceinfo {
 	const struct nft_pktinfo	*pkt;
 	const struct nft_base_chain	*basechain;
 	const struct nft_chain		*chain;
-	const struct nft_rule		*rule;
+	const struct nft_rule_dp	*rule;
 	const struct nft_verdict	*verdict;
 	enum nft_trace_types		type;
 	bool				packet_dumped;
-- 
cgit v1.2.3


From 642c8eff5c6099dfde386ca3906fa55dc98f9ade Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 9 Jan 2022 17:11:20 +0100
Subject: netfilter: nf_tables: add NFT_REG32_NUM

Add a definition including the maximum number of 32-bits registers that
are used a scratchpad memory area to store data.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5a046b01bdab..515e5db97e01 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -105,6 +105,8 @@ struct nft_data {
 	};
 } __attribute__((aligned(__alignof__(u64))));
 
+#define NFT_REG32_NUM		20
+
 /**
  *	struct nft_regs - nf_tables register set
  *
@@ -115,7 +117,7 @@ struct nft_data {
  */
 struct nft_regs {
 	union {
-		u32			data[20];
+		u32			data[NFT_REG32_NUM];
 		struct nft_verdict	verdict;
 	};
 };
-- 
cgit v1.2.3


From 12e4ecfa244be2f117ef5304d2d866b65e70bff3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 9 Jan 2022 17:11:21 +0100
Subject: netfilter: nf_tables: add register tracking infrastructure

This patch adds new infrastructure to skip redundant selector store
operations on the same register to achieve a performance boost from
the packet path.

This is particularly noticeable in pure linear rulesets but it also
helps in rulesets which are already heaving relying in maps to avoid
ruleset linear inspection.

The idea is to keep data of the most recurrent store operations on
register to reuse them with cmp and lookup expressions.

This infrastructure allows for dynamic ruleset updates since the ruleset
blob reduction happens from the kernel.

Userspace still needs to be updated to maximize register utilization to
cooperate to improve register data reuse / reduce number of store on
register operations.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 515e5db97e01..1c37ce61daea 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -122,6 +122,16 @@ struct nft_regs {
 	};
 };
 
+struct nft_regs_track {
+	struct {
+		const struct nft_expr		*selector;
+		const struct nft_expr		*bitwise;
+	} regs[NFT_REG32_NUM];
+
+	const struct nft_expr			*cur;
+	const struct nft_expr			*last;
+};
+
 /* Store/load an u8, u16 or u64 integer to/from the u32 data register.
  *
  * Note, when using concatenations, register allocation happens at 32-bit
@@ -886,6 +896,8 @@ struct nft_expr_ops {
 	int				(*validate)(const struct nft_ctx *ctx,
 						    const struct nft_expr *expr,
 						    const struct nft_data **data);
+	bool				(*reduce)(struct nft_regs_track *track,
+						  const struct nft_expr *expr);
 	bool				(*gc)(struct net *net,
 					      const struct nft_expr *expr);
 	int				(*offload)(struct nft_offload_ctx *ctx,
-- 
cgit v1.2.3


From be5650f8f47e8cffbbbcad08b71103685e971f20 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 9 Jan 2022 17:11:24 +0100
Subject: netfilter: nft_bitwise: track register operations

Check if the destination register already contains the data that this
bitwise expression performs. This allows to skip this redundant
operation.

If the destination contains a different bitwise operation, cancel the
register tracking information. If the destination contains no bitwise
operation, update the register tracking information.

Update the payload and meta expression to check if this bitwise
operation has been already performed on the register. Hence, both the
payload/meta and the bitwise expressions are reduced.

There is also a special case: If source register != destination register
and source register is not updated by a previous bitwise operation, then
transfer selector from the source register to the destination register.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1c37ce61daea..eaf55da9a205 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -358,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
 void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
 int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
 		  const struct nft_expr *expr);
+bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
+			     const struct nft_expr *expr);
 
 struct nft_set_ext;
 
-- 
cgit v1.2.3


From 6f022c2ddbcefaee79502ce5386dfe351d457070 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Thu, 6 Jan 2022 17:38:04 +0200
Subject: net: openvswitch: Fix ct_state nat flags for conns arriving from tc

Netfilter conntrack maintains NAT flags per connection indicating
whether NAT was configured for the connection. Openvswitch maintains
NAT flags on the per packet flow key ct_state field, indicating
whether NAT was actually executed on the packet.

When a packet misses from tc to ovs the conntrack NAT flags are set.
However, NAT was not necessarily executed on the packet because the
connection's state might still be in NEW state. As such, openvswitch
wrongly assumes that NAT was executed and sets an incorrect flow key
NAT flags.

Fix this, by flagging to openvswitch which NAT was actually done in
act_ct via tc_skb_ext and tc_skb_cb to the openvswitch module, so
the packet flow key NAT flags will be correctly set.

Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20220106153804.26451-1-paulb@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h  | 4 +++-
 include/net/pkt_sched.h | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4507d77d6941..60ab0c2fe567 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -287,7 +287,9 @@ struct tc_skb_ext {
 	__u32 chain;
 	__u16 mru;
 	__u16 zone;
-	bool post_ct;
+	u8 post_ct:1;
+	u8 post_ct_snat:1;
+	u8 post_ct_dnat:1;
 };
 #endif
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9e71691c491b..9e7b21c0b3a6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -197,7 +197,9 @@ struct tc_skb_cb {
 	struct qdisc_skb_cb qdisc_cb;
 
 	u16 mru;
-	bool post_ct;
+	u8 post_ct:1;
+	u8 post_ct_snat:1;
+	u8 post_ct_dnat:1;
 	u16 zone; /* Only valid if post_ct = true */
 };
 
-- 
cgit v1.2.3


From c504e5c2f9648a1e5c2be01e8c3f59d394192bd3 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Sun, 9 Jan 2022 14:36:26 +0800
Subject: net: skb: introduce kfree_skb_reason()

Introduce the interface kfree_skb_reason(), which is able to pass
the reason why the skb is dropped to 'kfree_skb' tracepoint.

Add the 'reason' field to 'trace_kfree_skb', therefor user can get
more detail information about abnormal skb with 'drop_monitor' or
eBPF.

All drop reasons are defined in the enum 'skb_drop_reason', and
they will be print as string in 'kfree_skb' tracepoint in format
of 'reason: XXX'.

( Maybe the reasons should be defined in a uapi header file, so that
user space can use them? )

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h     | 23 ++++++++++++++++++++++-
 include/trace/events/skb.h | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 51 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 642acb0d1646..ef0870abc791 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -305,6 +305,17 @@ struct sk_buff_head {
 
 struct sk_buff;
 
+/* The reason of skb drop, which is used in kfree_skb_reason().
+ * en...maybe they should be splited by group?
+ *
+ * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is
+ * used to translate the reason to string.
+ */
+enum skb_drop_reason {
+	SKB_DROP_REASON_NOT_SPECIFIED,
+	SKB_DROP_REASON_MAX,
+};
+
 /* To allow 64K frame to be packed as single skb without frag_list we
  * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
  * buffers which do not start on a page boundary.
@@ -1085,8 +1096,18 @@ static inline bool skb_unref(struct sk_buff *skb)
 	return true;
 }
 
+void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+
+/**
+ *	kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
+ *	@skb: buffer to free
+ */
+static inline void kfree_skb(struct sk_buff *skb)
+{
+	kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+}
+
 void skb_release_head_state(struct sk_buff *skb);
-void kfree_skb(struct sk_buff *skb);
 void kfree_skb_list(struct sk_buff *segs);
 void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
 void skb_tx_error(struct sk_buff *skb);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 9e92f22eb086..294c61bbe44b 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -9,29 +9,51 @@
 #include <linux/netdevice.h>
 #include <linux/tracepoint.h>
 
+#define TRACE_SKB_DROP_REASON					\
+	EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED)	\
+	EMe(SKB_DROP_REASON_MAX, MAX)
+
+#undef EM
+#undef EMe
+
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+TRACE_SKB_DROP_REASON
+
+#undef EM
+#undef EMe
+#define EM(a, b)	{ a, #b },
+#define EMe(a, b)	{ a, #b }
+
 /*
  * Tracepoint for free an sk_buff:
  */
 TRACE_EVENT(kfree_skb,
 
-	TP_PROTO(struct sk_buff *skb, void *location),
+	TP_PROTO(struct sk_buff *skb, void *location,
+		 enum skb_drop_reason reason),
 
-	TP_ARGS(skb, location),
+	TP_ARGS(skb, location, reason),
 
 	TP_STRUCT__entry(
-		__field(	void *,		skbaddr		)
-		__field(	void *,		location	)
-		__field(	unsigned short,	protocol	)
+		__field(void *,		skbaddr)
+		__field(void *,		location)
+		__field(unsigned short,	protocol)
+		__field(enum skb_drop_reason,	reason)
 	),
 
 	TP_fast_assign(
 		__entry->skbaddr = skb;
 		__entry->location = location;
 		__entry->protocol = ntohs(skb->protocol);
+		__entry->reason = reason;
 	),
 
-	TP_printk("skbaddr=%p protocol=%u location=%p",
-		__entry->skbaddr, __entry->protocol, __entry->location)
+	TP_printk("skbaddr=%p protocol=%u location=%p reason: %s",
+		  __entry->skbaddr, __entry->protocol, __entry->location,
+		  __print_symbolic(__entry->reason,
+				   TRACE_SKB_DROP_REASON))
 );
 
 TRACE_EVENT(consume_skb,
-- 
cgit v1.2.3


From 85125597419aec3aa7b8f3b8713e415f997796f2 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Sun, 9 Jan 2022 14:36:27 +0800
Subject: net: skb: use kfree_skb_reason() in tcp_v4_rcv()

Replace kfree_skb() with kfree_skb_reason() in tcp_v4_rcv(). Following
drop reasons are added:

SKB_DROP_REASON_NO_SOCKET
SKB_DROP_REASON_PKT_TOO_SMALL
SKB_DROP_REASON_TCP_CSUM
SKB_DROP_REASON_TCP_FILTER

After this patch, 'kfree_skb' event will print message like this:

$           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
$              | |         |   |||||     |         |
          <idle>-0       [000] ..s1.    36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCKET

The reason of skb drop is printed too.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h     | 4 ++++
 include/trace/events/skb.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ef0870abc791..c9c97b0d0fe9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -313,6 +313,10 @@ struct sk_buff;
  */
 enum skb_drop_reason {
 	SKB_DROP_REASON_NOT_SPECIFIED,
+	SKB_DROP_REASON_NO_SOCKET,
+	SKB_DROP_REASON_PKT_TOO_SMALL,
+	SKB_DROP_REASON_TCP_CSUM,
+	SKB_DROP_REASON_TCP_FILTER,
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 294c61bbe44b..faa7d068a7bc 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -11,6 +11,10 @@
 
 #define TRACE_SKB_DROP_REASON					\
 	EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED)	\
+	EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET)		\
+	EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL)	\
+	EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM)			\
+	EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER)		\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
-- 
cgit v1.2.3


From 1c7fab70df085d866a3765955f397ca2b4025b15 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Sun, 9 Jan 2022 14:36:28 +0800
Subject: net: skb: use kfree_skb_reason() in __udp4_lib_rcv()

Replace kfree_skb() with kfree_skb_reason() in __udp4_lib_rcv.
New drop reason 'SKB_DROP_REASON_UDP_CSUM' is added for udp csum
error.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h     | 1 +
 include/trace/events/skb.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c9c97b0d0fe9..af64c7de9b53 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -317,6 +317,7 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_PKT_TOO_SMALL,
 	SKB_DROP_REASON_TCP_CSUM,
 	SKB_DROP_REASON_TCP_FILTER,
+	SKB_DROP_REASON_UDP_CSUM,
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index faa7d068a7bc..3e042ca2cedb 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -15,6 +15,7 @@
 	EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL)	\
 	EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM)			\
 	EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER)		\
+	EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM)			\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
-- 
cgit v1.2.3


From 1c582c6dc4244d88f702dc3afd5b47225332edf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 3 Nov 2021 20:38:21 +0100
Subject: 9p/trans_fd: split into dedicated module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows these transports only to be used when needed.

Link: https://lkml.kernel.org/r/20211103193823.111007-3-linux@weissschuh.net
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
[Dominique: Kconfig NET_9P_FD: -depends VIRTIO, +default NET_9P]
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
---
 include/net/9p/9p.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 9c6ec78e47a5..24a509f559ee 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -551,6 +551,4 @@ struct p9_fcall {
 int p9_errstr2errno(char *errstr, int len);
 
 int p9_error_init(void);
-int p9_trans_fd_init(void);
-void p9_trans_fd_exit(void);
 #endif /* NET_9P_H */
-- 
cgit v1.2.3


From 019641d1b57dff018972b23c95e898f9ff18222f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 3 Nov 2021 20:38:23 +0100
Subject: net/p9: load default transports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that all transports are split into modules it may happen that no
transports are registered when v9fs_get_default_trans() is called.
When that is the case try to load more transports from modules.

Link: https://lkml.kernel.org/r/20211103193823.111007-5-linux@weissschuh.net
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
[Dominique: constify v9fs_get_trans_by_name argument as per patch1v2]
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
---
 include/net/9p/transport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 15a4e6a9dbf7..ff842f963071 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -54,7 +54,7 @@ struct p9_trans_module {
 
 void v9fs_register_trans(struct p9_trans_module *m);
 void v9fs_unregister_trans(struct p9_trans_module *m);
-struct p9_trans_module *v9fs_get_trans_by_name(char *s);
+struct p9_trans_module *v9fs_get_trans_by_name(const char *s);
 struct p9_trans_module *v9fs_get_default_trans(void);
 void v9fs_put_trans(struct p9_trans_module *m);
 
-- 
cgit v1.2.3


From 1ed147e29e505de819aaa5b57919c25348f72e1f Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon@kernel.org>
Date: Thu, 25 Nov 2021 21:01:11 +0900
Subject: exfat: move super block magic number to magic.h

Move exfat superblock magic number from local definition to magic.h.
It is also needed by userspace programs that call fstatfs().

Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 include/uapi/linux/magic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 35687dcb1a42..8ab81ea13424 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -43,6 +43,7 @@
 #define MINIX3_SUPER_MAGIC	0x4d5a		/* minix v3 fs, 60 char names */
 
 #define MSDOS_SUPER_MAGIC	0x4d44		/* MD */
+#define EXFAT_SUPER_MAGIC	0x2011BAB0
 #define NCP_SUPER_MAGIC		0x564c		/* Guess, what 0x564c is :-) */
 #define NFS_SUPER_MAGIC		0x6969
 #define OCFS2_SUPER_MAGIC	0x7461636f
-- 
cgit v1.2.3


From a6b5a28eb56c3f4988f7ff5290b954ba296e309a Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Sat, 14 Nov 2020 13:43:54 -0500
Subject: nfs: Convert to new fscache volume/cookie API

Change the nfs filesystem to support fscache's indexing rewrite and
reenable caching in nfs.

The following changes have been made:

 (1) The fscache_netfs struct is no more, and there's no need to register
     the filesystem as a whole.

 (2) The session cookie is now an fscache_volume cookie, allocated with
     fscache_acquire_volume().  That takes three parameters: a string
     representing the "volume" in the index, a string naming the cache to
     use (or NULL) and a u64 that conveys coherency metadata for the
     volume.

     For nfs, I've made it render the volume name string as:

        "nfs,<ver>,<family>,<address>,<port>,<fsidH>,<fsidL>*<,param>[,<uniq>]"

 (3) The fscache_cookie_def is no more and needed information is passed
     directly to fscache_acquire_cookie().  The cache no longer calls back
     into the filesystem, but rather metadata changes are indicated at
     other times.

     fscache_acquire_cookie() is passed the same keying and coherency
     information as before.

 (4) fscache_enable/disable_cookie() have been removed.

     Call fscache_use_cookie() and fscache_unuse_cookie() when a file is
     opened or closed to prevent a cache file from being culled and to keep
     resources to hand that are needed to do I/O.

     If a file is opened for writing, we invalidate it with
     FSCACHE_INVAL_DIO_WRITE in lieu of doing writeback to the cache,
     thereby making it cease caching until all currently open files are
     closed.  This should give the same behaviour as the uptream code.
     Making the cache store local modifications isn't straightforward for
     NFS, so that's left for future patches.

 (5) fscache_invalidate() now needs to be given uptodate auxiliary data and
     a file size.  It also takes a flag to indicate if this was due to a
     DIO write.

 (6) Call nfs_fscache_invalidate() with FSCACHE_INVAL_DIO_WRITE on a file
     to which a DIO write is made.

 (7) Call fscache_note_page_release() from nfs_release_page().

 (8) Use a killable wait in nfs_vm_page_mkwrite() when waiting for
     PG_fscache to be cleared.

 (9) The functions to read and write data to/from the cache are stubbed out
     pending a conversion to use netfslib.

Changes
=======
ver #3:
 - Added missing =n fallback for nfs_fscache_release_file()[1][2].

ver #2:
 - Use gfpflags_allow_blocking() rather than using flag directly.
 - fscache_acquire_volume() now returns errors.
 - Remove NFS_INO_FSCACHE as it's no longer used.
 - Need to unuse a cookie on file-release, not inode-clear.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Co-developed-by: David Howells <dhowells@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna.schumaker@netapp.com>
cc: linux-nfs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/202112100804.nksO8K4u-lkp@intel.com/ [1]
Link: https://lore.kernel.org/r/202112100957.2oEDT20W-lkp@intel.com/ [2]
Link: https://lore.kernel.org/r/163819668938.215744.14448852181937731615.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163906979003.143852.2601189243864854724.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967182112.1823006.7791504655391213379.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021575950.640689.12069642327533368467.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/nfs_fs.h    | 1 -
 include/linux/nfs_fs_sb.h | 9 ++-------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 05f249f20f55..00835bacd236 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -275,7 +275,6 @@ struct nfs4_copy_state {
 #define NFS_INO_ACL_LRU_SET	(2)		/* Inode is on the LRU list */
 #define NFS_INO_INVALIDATING	(3)		/* inode is being invalidated */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
-#define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
 #define NFS_INO_FORCE_READDIR	(7)		/* force readdirplus */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2a9acbfe00f0..77b2dba27bbb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -120,11 +120,6 @@ struct nfs_client {
 	 * This is used to generate the mv0 callback address.
 	 */
 	char			cl_ipaddr[48];
-
-#ifdef CONFIG_NFS_FSCACHE
-	struct fscache_cookie	*fscache;	/* client index cache cookie */
-#endif
-
 	struct net		*cl_net;
 	struct list_head	pending_cb_stateids;
 };
@@ -194,8 +189,8 @@ struct nfs_server {
 	struct nfs_auth_info	auth_info;	/* parsed auth flavors */
 
 #ifdef CONFIG_NFS_FSCACHE
-	struct nfs_fscache_key	*fscache_key;	/* unique key for superblock */
-	struct fscache_cookie	*fscache;	/* superblock cookie */
+	struct fscache_volume	*fscache;	/* superblock cookie */
+	char			*fscache_uniq;	/* Uniquifier (or NULL) */
 #endif
 
 	u32			pnfs_blksize;	/* layout_blksize attr */
-- 
cgit v1.2.3


From 16f2f4e679cfdaa9552574484f104014908a76c6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 27 Aug 2021 15:19:34 +0100
Subject: nfs: Implement cache I/O by accessing the cache directly

Move NFS to using fscache DIO API instead of the old upstream I/O API as
that has been removed.  This is a stopgap solution as the intention is that
at sometime in the future, the cache will move to using larger blocks and
won't be able to store individual pages in order to deal with the potential
for data corruption due to the backing filesystem being able insert/remove
bridging blocks of zeros into its extent list[1].

NFS then reads and writes cache pages synchronously and one page at a time.

The preferred change would be to use the netfs lib, but the new I/O API can
be used directly.  It's just that as the cache now needs to track data for
itself, caching blocks may exceed page size...

This code is somewhat borrowed from my "fallback I/O" patchset[2].

Changes
=======
ver #3:
 - Restore lost =n fallback for nfs_fscache_release_page()[2].

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna.schumaker@netapp.com>
cc: linux-nfs@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu [1]
Link: https://lore.kernel.org/r/202112100957.2oEDT20W-lkp@intel.com/ [2]
Link: https://lore.kernel.org/r/163189108292.2509237.12615909591150927232.stgit@warthog.procyon.org.uk/ [2]
Link: https://lore.kernel.org/r/163906981318.143852.17220018647843475985.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163967184451.1823006.6450645559828329590.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/164021577632.640689.11069627070150063812.stgit@warthog.procyon.org.uk/ # v4
---
 include/linux/fscache.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 7bd35f60d19a..ede50406bcb0 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -168,6 +168,7 @@ extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
 extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t);
 extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int);
 extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *);
+extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *);
 
 extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *,
 				     loff_t, size_t, loff_t, netfs_io_terminated_t, void *,
@@ -499,6 +500,33 @@ int fscache_read(struct netfs_cache_resources *cres,
 			 term_func, term_func_priv);
 }
 
+/**
+ * fscache_begin_write_operation - Begin a write operation for the netfs lib
+ * @cres: The cache resources for the write being performed
+ * @cookie: The cookie representing the cache object
+ *
+ * Begin a write operation on behalf of the netfs helper library.  @cres
+ * indicates the cache resources to which the operation state should be
+ * attached; @cookie indicates the cache object that will be accessed.
+ *
+ * @cres->inval_counter is set from @cookie->inval_counter for comparison at
+ * the end of the operation.  This allows invalidation during the operation to
+ * be detected by the caller.
+ *
+ * Returns:
+ * * 0		- Success
+ * * -ENOBUFS	- No caching available
+ * * Other error code from the cache, such as -ENOMEM.
+ */
+static inline
+int fscache_begin_write_operation(struct netfs_cache_resources *cres,
+				  struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_enabled(cookie))
+		return __fscache_begin_write_operation(cres, cookie);
+	return -ENOBUFS;
+}
+
 /**
  * fscache_write - Start a write to the cache.
  * @cres: The cache resources to use
-- 
cgit v1.2.3


From dc6c6fb3d639756a532bcc47d4a9bf9f3965881b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Jan 2022 13:26:51 -0500
Subject: SUNRPC: Fix sockaddr handling in the svc_xprt_create_error trace
 point

While testing, I got an unexpected KASAN splat:

Jan 08 13:50:27 oracle-102.nfsv4.dev kernel: BUG: KASAN: stack-out-of-bounds in trace_event_raw_event_svc_xprt_create_err+0x190/0x210 [sunrpc]
Jan 08 13:50:27 oracle-102.nfsv4.dev kernel: Read of size 28 at addr ffffc9000008f728 by task mount.nfs/4628

The memcpy() in the TP_fast_assign section of this trace point
copies the size of the destination buffer in order that the buffer
won't be overrun.

In other similar trace points, the source buffer for this memcpy is
a "struct sockaddr_storage" so the actual length of the source
buffer is always long enough to prevent the memcpy from reading
uninitialized or unallocated memory.

However, for this trace point, the source buffer can be as small as
a "struct sockaddr_in". For AF_INET sockaddrs, the memcpy() reads
memory that follows the source buffer, which is not always valid
memory.

To avoid copying past the end of the passed-in sockaddr, make the
source address's length available to the memcpy(). It would be a
little nicer if the tracing infrastructure was more friendly about
storing socket addresses that are not AF_INET, but I could not find
a way to make printk("%pIS") work with a dynamic array.

Reported-by: KASAN
Fixes: 4b8f380e46e4 ("SUNRPC: Tracepoint to record errors in svc_xpo_create()")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 684cc0e322fa..1c0a288f6a5c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1744,10 +1744,11 @@ TRACE_EVENT(svc_xprt_create_err,
 		const char *program,
 		const char *protocol,
 		struct sockaddr *sap,
+		size_t salen,
 		const struct svc_xprt *xprt
 	),
 
-	TP_ARGS(program, protocol, sap, xprt),
+	TP_ARGS(program, protocol, sap, salen, xprt),
 
 	TP_STRUCT__entry(
 		__field(long, error)
@@ -1760,7 +1761,7 @@ TRACE_EVENT(svc_xprt_create_err,
 		__entry->error = PTR_ERR(xprt);
 		__assign_str(program, program);
 		__assign_str(protocol, protocol);
-		memcpy(__entry->addr, sap, sizeof(__entry->addr));
+		memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr)));
 	),
 
 	TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
-- 
cgit v1.2.3


From 16720861675393a35974532b3c837d9fd7bfe08c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 8 Jan 2022 16:59:54 -0500
Subject: SUNRPC: Fix sockaddr handling in svcsock_accept_class trace points

Avoid potentially hazardous memory copying and the needless use of
"%pIS" -- in the kernel, an RPC service listener is always bound to
ANYADDR. Having the network namespace is helpful when recording
errors, though.

Fixes: a0469f46faab ("SUNRPC: Replace dprintk call sites in TCP state change callouts")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 1c0a288f6a5c..1e566ac4b812 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -2125,17 +2125,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class,
 	TP_STRUCT__entry(
 		__field(long, status)
 		__string(service, service)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__field(unsigned int, netns_ino)
 	),
 
 	TP_fast_assign(
 		__entry->status = status;
 		__assign_str(service, service);
-		memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr));
+		__entry->netns_ino = xprt->xpt_net->ns.inum;
 	),
 
-	TP_printk("listener=%pISpc service=%s status=%ld",
-		__entry->addr, __get_str(service), __entry->status
+	TP_printk("addr=listener service=%s status=%ld",
+		__get_str(service), __entry->status
 	)
 );
 
-- 
cgit v1.2.3


From bbc605cdb1e15aafaec899fedc385dc75dddac0e Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Mon, 13 Dec 2021 14:56:18 +0100
Subject: ext4: implement support for get/set fs label

Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
online reading and setting of file system label.

ext4_ioctl_getlabel() is simple, just get the label from the primary
superblock. This might not be the first sb on the file system if
'sb=' mount option is used.

In ext4_ioctl_setlabel() we update what ext4 currently views as a
primary superblock and then proceed to update backup superblocks. There
are two caveats:
 - the primary superblock might not be the first superblock and so it
   might not be the one used by userspace tools if read directly
   off the disk.
 - because the primary superblock might not be the first superblock we
   potentialy have to update it as part of backup superblock update.
   However the first sb location is a bit more complicated than the rest
   so we have to account for that.

The superblock modification is created generic enough so the
infrastructure can be used for other potential superblock modification
operations, such as chaning UUID.

Tested with generic/492 with various configurations. I also checked the
behavior with 'sb=' mount options, including very large file systems
with and without sparse_super/sparse_super2.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Link: https://lore.kernel.org/r/20211213135618.43303-1-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 0ea36b2b0662..19e957b7f941 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2837,6 +2837,29 @@ TRACE_EVENT(ext4_fc_track_range,
 		      __entry->end)
 	);
 
+TRACE_EVENT(ext4_update_sb,
+	TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk,
+		 unsigned int flags),
+
+	TP_ARGS(sb, fsblk, flags),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev)
+		__field(ext4_fsblk_t,	fsblk)
+		__field(unsigned int,	flags)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->fsblk	= fsblk;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev %d,%d fsblk %llu flags %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->fsblk, __entry->flags)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From a59466ee91aaa9d43889a4c51e01de087d188448 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Tue, 11 Jan 2022 10:28:47 +0000
Subject: memblock: Remove #ifdef __KERNEL__ from memblock.h

memblock.h is not a uAPI header, so __KERNEL__ guard can be deleted

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20220111102847.673746-1-karolinadrobnik@gmail.com
---
 include/linux/memblock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9dc7cb239d21..50ad19662a32 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _LINUX_MEMBLOCK_H
 #define _LINUX_MEMBLOCK_H
-#ifdef __KERNEL__
 
 /*
  * Logical memory blocks.
@@ -605,6 +604,5 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 }
 #endif
 
-#endif /* __KERNEL__ */
 
 #endif /* _LINUX_MEMBLOCK_H */
-- 
cgit v1.2.3


From 16f035d9e264d95d61d5f4056bb00d8169a7a3d1 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 7 Jan 2022 01:13:02 +0100
Subject: sections: Fix __is_kernel() to include init ranges

With CONFIG_KALLSYMS_ALL=y, the function is_ksym_addr() is used to
determine if a symbol is from inside the kernel range. For that the
given symbol address is checked if it's inside the _stext to _end range.

Although this is correct, some architectures (e.g. parisc) may have the
init area before the _stext address and as such the check in
is_ksym_addr() fails.  By extending the range check to include the init
section, __is_kernel() will now detect symbols in this range as well.

This fixes an issue on parisc where addresses of kernel functions in
init sections aren't resolved to their symbol names.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/asm-generic/sections.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 1dfadb2e878d..00566b1fd699 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -193,12 +193,16 @@ static inline bool __is_kernel_text(unsigned long addr)
  * @addr: address to check
  *
  * Returns: true if the address is located in the kernel range, false otherwise.
- * Note: an internal helper, only check the range of _stext to _end.
+ * Note: an internal helper, check the range of _stext to _end,
+ *       and range from __init_begin to __init_end, which can be outside
+ *       of the _stext to _end range.
  */
 static inline bool __is_kernel(unsigned long addr)
 {
-	return addr >= (unsigned long)_stext &&
-	       addr < (unsigned long)_end;
+	return ((addr >= (unsigned long)_stext &&
+	         addr < (unsigned long)_end) ||
+		(addr >= (unsigned long)__init_begin &&
+		 addr < (unsigned long)__init_end));
 }
 
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
-- 
cgit v1.2.3


From 500b55b05d0a21c4adddf4c3b29ee6f32b502046 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 21 Dec 2021 10:45:07 -0600
Subject: PCI: Work around Intel I210 ROM BAR overlap defect

Per PCIe r5, sec 7.5.1.2.4, a device must not claim accesses to its
Expansion ROM unless both the Memory Space Enable and the Expansion ROM
Enable bit are set.  But apparently some Intel I210 NICs don't work
correctly if the ROM BAR overlaps another BAR, even if the Expansion ROM is
disabled.

Michael reported that on a Kontron SMARC-sAL28 ARM64 system with U-Boot
v2021.01-rc3, the ROM BAR overlaps BAR 3, and networking doesn't work at
all:

  BAR 0: 0x40000000 (32-bit, non-prefetchable) [size=1M]
  BAR 3: 0x40200000 (32-bit, non-prefetchable) [size=16K]
  ROM:   0x40200000 (disabled) [size=1M]

  NETDEV WATCHDOG: enP2p1s0 (igb): transmit queue 0 timed out
  Hardware name: Kontron SMARC-sAL28 (Single PHY) on SMARC Eval 2.0 carrier (DT)
  igb 0002:01:00.0 enP2p1s0: Reset adapter

Previously, pci_std_update_resource() wrote the assigned ROM address to the
BAR only when the ROM was enabled.  This meant that the I210 ROM BAR could
be left with an address assigned by firmware, which might overlap with
other BARs.

Quirk these I210 devices so pci_std_update_resource() always writes the
assigned address to the ROM BAR, whether or not the ROM is enabled.

Link: https://lore.kernel.org/r/20211223163754.GA1267351@bhelgaas
Link: https://lore.kernel.org/r/20201230185317.30915-1-michael@walle.cc
Link: https://bugzilla.kernel.org/show_bug.cgi?id=211105
Reported-by: Michael Walle <michael@walle.cc>
Tested-by: Michael Walle <michael@walle.cc>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 18a75c8e615c..51c4a063f489 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -455,6 +455,7 @@ struct pci_dev {
 	unsigned int	link_active_reporting:1;/* Device capable of reporting link active */
 	unsigned int	no_vf_scan:1;		/* Don't scan for VFs after IOV enablement */
 	unsigned int	no_command_memory:1;	/* No PCI_COMMAND_MEMORY */
+	unsigned int	rom_bar_overlap:1;	/* ROM BAR disable broken */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From 1d3cfc2835c1754d19a743dc346a9e58cf0c07c0 Mon Sep 17 00:00:00 2001
From: Kelvin Cao <kelvin.cao@microchip.com>
Date: Thu, 23 Dec 2021 17:23:33 -0800
Subject: ntb_hw_switchtec: Remove code for disabling ID protection

ID protection is a firmware setting for NT window access control. With
it enabled, only the posted requests with requester IDs in the requester
ID table will be allowed to access the NT windows. Otherwise all posted
requests are allowed. Normally user will configure it statically via the
Switchtec config file, and it will take effect when the firmware boots
up. The driver can also toggle the ID protection setting dynamically,
which will overwrite the static setting in the Switchtec config file as
a side effect.

Currently, the driver disables the ID protection. However, it's not
necessary to disable the ID protection at the driver level as the driver
has already configured the proper requester IDs in the requester ID
table to allow the corresponding posted requests to hit the NT windows.
Remove the code that disables the ID protection to make the static
setting prevail.

Note: ID protection is not applicable to non-posted requests.

Signed-off-by: Kelvin Cao <kelvin.cao@microchip.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/switchtec.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index be24056ac00f..48fabe36509e 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -337,8 +337,6 @@ enum {
 	NTB_CTRL_REQ_ID_EN = 1 << 0,
 
 	NTB_CTRL_LUT_EN = 1 << 0,
-
-	NTB_PART_CTRL_ID_PROT_DIS = 1 << 0,
 };
 
 struct ntb_ctrl_regs {
-- 
cgit v1.2.3


From 9b7a4de9f126d8c8d59052088213990159417d5b Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 16 Dec 2021 10:45:03 +0100
Subject: drm/amdkfd: make SPDX License expression more sound

Commit b5f57384805a ("drm/amdkfd: Add sysfs bitfields and enums to uAPI")
adds include/uapi/linux/kfd_sysfs.h with the "GPL-2.0 OR MIT WITH
Linux-syscall-note" SPDX-License expression.

The command ./scripts/spdxcheck.py warns:

  include/uapi/linux/kfd_sysfs.h: 1:48 Exception not valid for license MIT: Linux-syscall-note

For a uapi header, the file under GPLv2 License must be combined with the
Linux-syscall-note, but combining the MIT License with the
Linux-syscall-note makes no sense, as the note provides an exception for
GPL-licensed code, not for permissively licensed code.

So, reorganize the SPDX expression to only combine the note with the GPL
License condition. This makes spdxcheck happy again.

Fixes: b5f57384805a ("drm/amdkfd: Add sysfs bitfields and enums to uAPI")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: kstewart@linuxfoundation.org
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
index e1fb78b4bf09..3e330f368917 100644
--- a/include/uapi/linux/kfd_sysfs.h
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
 /*
  * Copyright 2021 Advanced Micro Devices, Inc.
  *
-- 
cgit v1.2.3


From e6435f1e02f410e3507f02a37c0fbb17971ddc7c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 6 Dec 2021 15:54:04 +0000
Subject: fscache: Add a tracepoint for cookie use/unuse

Add a tracepoint to track fscache_use/unuse_cookie().

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/164021588628.640689.12942919367404043608.stgit@warthog.procyon.org.uk/ # v4
---
 include/trace/events/fscache.h | 44 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index 1594aefadeac..cb3fb337e880 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -71,6 +71,12 @@ enum fscache_cookie_trace {
 	fscache_cookie_see_work,
 };
 
+enum fscache_active_trace {
+	fscache_active_use,
+	fscache_active_use_modify,
+	fscache_active_unuse,
+};
+
 enum fscache_access_trace {
 	fscache_access_acquire_volume,
 	fscache_access_acquire_volume_end,
@@ -146,6 +152,11 @@ enum fscache_access_trace {
 	EM(fscache_cookie_see_withdraw,		"-   x-wth")		\
 	E_(fscache_cookie_see_work,		"-   work ")
 
+#define fscache_active_traces		\
+	EM(fscache_active_use,			"USE          ")	\
+	EM(fscache_active_use_modify,		"USE-m        ")	\
+	E_(fscache_active_unuse,		"UNUSE        ")
+
 #define fscache_access_traces		\
 	EM(fscache_access_acquire_volume,	"BEGIN acq_vol")	\
 	EM(fscache_access_acquire_volume_end,	"END   acq_vol")	\
@@ -264,6 +275,39 @@ TRACE_EVENT(fscache_cookie,
 		      __entry->ref)
 	    );
 
+TRACE_EVENT(fscache_active,
+	    TP_PROTO(unsigned int cookie_debug_id,
+		     int ref,
+		     int n_active,
+		     int n_accesses,
+		     enum fscache_active_trace why),
+
+	    TP_ARGS(cookie_debug_id, ref, n_active, n_accesses, why),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		cookie		)
+		    __field(int,			ref		)
+		    __field(int,			n_active	)
+		    __field(int,			n_accesses	)
+		    __field(enum fscache_active_trace,	why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie_debug_id;
+		    __entry->ref	= ref;
+		    __entry->n_active	= n_active;
+		    __entry->n_accesses	= n_accesses;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("c=%08x %s r=%d a=%d c=%d",
+		      __entry->cookie,
+		      __print_symbolic(__entry->why, fscache_active_traces),
+		      __entry->ref,
+		      __entry->n_accesses,
+		      __entry->n_active)
+	    );
+
 TRACE_EVENT(fscache_access_cache,
 	    TP_PROTO(unsigned int cache_debug_id,
 		     int ref,
-- 
cgit v1.2.3


From b1ae6dc41eaaa98bb75671e0f3665bfda248c3e7 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 5 Jan 2022 13:55:12 -0800
Subject: module: add in-kernel support for decompressing

Current scheme of having userspace decompress kernel modules before
loading them into the kernel runs afoul of LoadPin security policy, as
it loses link between the source of kernel module on the disk and binary
blob that is being loaded into the kernel. To solve this issue let's
implement decompression in kernel, so that we can pass a file descriptor
of compressed module file into finit_module() which will keep LoadPin
happy.

To let userspace know what compression/decompression scheme kernel
supports it will create /sys/module/compression attribute. kmod can read
this attribute and decide if it can pass compressed file to
finit_module(). New MODULE_INIT_COMPRESSED_DATA flag indicates that the
kernel should attempt to decompress the data read from file descriptor
prior to trying load the module.

To simplify things kernel will only implement single decompression
method matching compression method selected when generating modules.
This patch implements gzip and xz; more can be added later,

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/uapi/linux/module.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h
index 50d98ec5e866..03a33ffffcba 100644
--- a/include/uapi/linux/module.h
+++ b/include/uapi/linux/module.h
@@ -5,5 +5,6 @@
 /* Flags for sys_finit_module: */
 #define MODULE_INIT_IGNORE_MODVERSIONS	1
 #define MODULE_INIT_IGNORE_VERMAGIC	2
+#define MODULE_INIT_COMPRESSED_FILE	4
 
 #endif /* _UAPI_LINUX_MODULE_H */
-- 
cgit v1.2.3


From ca321ec74322e3c49552fc1ffc80b42d0dbf1a84 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 8 Jan 2022 15:06:57 +0100
Subject: module.h: allow #define strings to work with MODULE_IMPORT_NS

The MODULE_IMPORT_NS() macro does not allow defined strings to work
properly with it, so add a layer of indirection to allow this to happen.

Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Matthias Maennich <maennich@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/module.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index c9f1200b2312..f4338235ed2c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -290,7 +290,8 @@ extern typeof(name) __mod_##type##__##name##_device_table		\
  * files require multiple MODULE_FIRMWARE() specifiers */
 #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
 
-#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
+#define _MODULE_IMPORT_NS(ns)	MODULE_INFO(import_ns, #ns)
+#define MODULE_IMPORT_NS(ns)	_MODULE_IMPORT_NS(ns)
 
 struct notifier_block;
 
-- 
cgit v1.2.3


From cb963a19d99fc42d9abf4238968ef85fcc2ef3e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 10 Jan 2022 01:47:50 -0800
Subject: net: sched: do not allocate a tracker in tcf_exts_init()

While struct tcf_exts has a net pointer, it is not refcounted
until tcf_exts_get_net() is called.

Fixes: dbdcda634ce3 ("net: sched: add netns refcount tracker to struct tcf_exts")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220110094750.236478-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/pkt_cls.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ebef45e821af..676cb8ea9e15 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -218,8 +218,10 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
 #ifdef CONFIG_NET_CLS_ACT
 	exts->type = 0;
 	exts->nr_actions = 0;
+	/* Note: we do not own yet a reference on net.
+	 * This reference might be taken later from tcf_exts_get_net().
+	 */
 	exts->net = net;
-	netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL);
 	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
 				GFP_KERNEL);
 	if (!exts->actions)
-- 
cgit v1.2.3


From 869b6ca39c08c5b10eeb29d4b3c4bc433bf8ba5e Mon Sep 17 00:00:00 2001
From: Huang Yiwei <quic_hyiwei@quicinc.com>
Date: Mon, 22 Nov 2021 13:05:09 +0800
Subject: dt-bindings: mailbox: Add more protocol and client ID

Add more protocol and client ID which can be used in device
tree properties.

Signed-off-by: Huang Yiwei <quic_hyiwei@quicinc.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 include/dt-bindings/mailbox/qcom-ipcc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/mailbox/qcom-ipcc.h b/include/dt-bindings/mailbox/qcom-ipcc.h
index eb91a6c05b71..9296d0bb5f34 100644
--- a/include/dt-bindings/mailbox/qcom-ipcc.h
+++ b/include/dt-bindings/mailbox/qcom-ipcc.h
@@ -8,6 +8,7 @@
 
 /* Signal IDs for MPROC protocol */
 #define IPCC_MPROC_SIGNAL_GLINK_QMP	0
+#define IPCC_MPROC_SIGNAL_TZ		1
 #define IPCC_MPROC_SIGNAL_SMP2P		2
 #define IPCC_MPROC_SIGNAL_PING		3
 
@@ -29,6 +30,7 @@
 #define IPCC_CLIENT_PCIE1		14
 #define IPCC_CLIENT_PCIE2		15
 #define IPCC_CLIENT_SPSS		16
+#define IPCC_CLIENT_TME			23
 #define IPCC_CLIENT_WPSS		24
 
 #endif
-- 
cgit v1.2.3


From 831c1ae725f7d2f8f858b0840692b48e75b49331 Mon Sep 17 00:00:00 2001
From: Sunil Muthuswamy <sunilmut@microsoft.com>
Date: Wed, 5 Jan 2022 11:32:35 -0800
Subject: PCI: hv: Make the code arch neutral by adding arch specific
 interfaces

Encapsulate arch dependencies in Hyper-V vPCI through a set of
arch-dependent interfaces. Adding these arch specific interfaces will
allow for an implementation for other architectures, such as arm64.

There are no functional changes expected from this patch.

Link: https://lore.kernel.org/r/1641411156-31705-2-git-send-email-sunilmut@linux.microsoft.com
Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 include/asm-generic/hyperv-tlfs.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 8ed6733d5146..8f97c2927bee 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -540,39 +540,6 @@ enum hv_interrupt_source {
 	HV_INTERRUPT_SOURCE_IOAPIC,
 };
 
-union hv_msi_address_register {
-	u32 as_uint32;
-	struct {
-		u32 reserved1:2;
-		u32 destination_mode:1;
-		u32 redirection_hint:1;
-		u32 reserved2:8;
-		u32 destination_id:8;
-		u32 msi_base:12;
-	};
-} __packed;
-
-union hv_msi_data_register {
-	u32 as_uint32;
-	struct {
-		u32 vector:8;
-		u32 delivery_mode:3;
-		u32 reserved1:3;
-		u32 level_assert:1;
-		u32 trigger_mode:1;
-		u32 reserved2:16;
-	};
-} __packed;
-
-/* HvRetargetDeviceInterrupt hypercall */
-union hv_msi_entry {
-	u64 as_uint64;
-	struct {
-		union hv_msi_address_register address;
-		union hv_msi_data_register data;
-	} __packed;
-};
-
 union hv_ioapic_rte {
 	u64 as_uint64;
 
-- 
cgit v1.2.3


From 3f4b32511a77bc5a05cfbf26fec94c4e1b1cf46a Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Fri, 7 Jan 2022 18:17:18 +0000
Subject: PM: core: Remove DEFINE_UNIVERSAL_DEV_PM_OPS() macro

The deprecated UNIVERSAL_DEV_PM_OPS() macro uses the provided callbacks
for both runtime PM and system sleep, which is very likely to be a
mistake, as a system sleep can be triggered while a given device is
already PM-suspended, which would cause the suspend callback to be
called twice.

The amount of users of UNIVERSAL_DEV_PM_OPS() is also tiny (16
occurences) compared to the number of places where
SET_SYSTEM_SLEEP_PM_OPS() is used with pm_runtime_force_suspend() and
pm_runtime_force_resume(), which makes me think that none of these cases
are actually valid.

As the new macro DEFINE_UNIVERSAL_DEV_PM_OPS() which was introduced to
replace UNIVERSAL_DEV_PM_OPS() is currently unused, remove it before
someone starts to use it in yet another invalid case.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index e1e9402180b9..02f059d814bb 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -366,6 +366,12 @@ static const struct dev_pm_ops name = { \
 	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
 }
 
+/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
+#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+const struct dev_pm_ops __maybe_unused name = { \
+	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+}
+
 /*
  * Use this for defining a set of PM operations to be used in all situations
  * (system suspend, hibernation or runtime PM).
@@ -378,20 +384,9 @@ static const struct dev_pm_ops name = { \
  * suspend and "early" resume callback pointers, .suspend_late() and
  * .resume_early(), to the same routines as .runtime_suspend() and
  * .runtime_resume(), respectively (and analogously for hibernation).
+ *
+ * Deprecated. You most likely don't want this macro.
  */
-#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
-static const struct dev_pm_ops name = { \
-	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-	RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
-}
-
-/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
-#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-const struct dev_pm_ops __maybe_unused name = { \
-	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-}
-
-/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */
 #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
 const struct dev_pm_ops __maybe_unused name = { \
 	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-- 
cgit v1.2.3


From 52cc1d7f9786d2be44a3ab9b5b48416a7618e713 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Fri, 7 Jan 2022 18:17:19 +0000
Subject: PM: core: Remove static qualifier in DEFINE_SIMPLE_DEV_PM_OPS macro

Keep this macro in line with the other ones. This makes it possible to
use them in the cases where the underlying dev_pm_ops structure is
exported.

Restore the "static" qualifier in the two drivers where the
DEFINE_SIMPLE_DEV_PM_OPS macro was used.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 02f059d814bb..8e13387e70ec 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -362,7 +362,7 @@ struct dev_pm_ops {
  * to RAM and hibernation.
  */
 #define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-static const struct dev_pm_ops name = { \
+const struct dev_pm_ops name = { \
 	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
 }
 
-- 
cgit v1.2.3


From 0ae101fdd3297b7165755340e05386f1e1379709 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Fri, 7 Jan 2022 18:17:20 +0000
Subject: PM: core: Add EXPORT[_GPL]_SIMPLE_DEV_PM_OPS macros

These macros are defined conditionally, according to CONFIG_PM:
- if CONFIG_PM is enabled, these macros resolve to
  DEFINE_SIMPLE_DEV_PM_OPS(), and the dev_pm_ops symbol will be
  exported.

- if CONFIG_PM is disabled, these macros will result in a dummy static
  dev_pm_ops to be created with the __maybe_unused flag. The dev_pm_ops
  will then be discarded by the compiler, along with the provided
  callback functions if they are not used anywhere else.

In the second case, the symbol is not exported, which should be
perfectly fine - users of the symbol should all use the pm_ptr() or
pm_sleep_ptr() macro, so the dev_pm_ops marked as "extern" in the
client's code will never be accessed.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 8e13387e70ec..8279af2c538a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -8,6 +8,7 @@
 #ifndef _LINUX_PM_H
 #define _LINUX_PM_H
 
+#include <linux/export.h>
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
@@ -357,14 +358,42 @@ struct dev_pm_ops {
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #endif
 
+#define _DEFINE_DEV_PM_OPS(name, \
+			   suspend_fn, resume_fn, \
+			   runtime_suspend_fn, runtime_resume_fn, idle_fn) \
+const struct dev_pm_ops name = { \
+	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \
+}
+
+#ifdef CONFIG_PM
+#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
+			   runtime_resume_fn, idle_fn, sec) \
+	_DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
+			   runtime_resume_fn, idle_fn); \
+	_EXPORT_SYMBOL(name, sec)
+#else
+#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
+			   runtime_resume_fn, idle_fn, sec) \
+static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
+					 resume_fn, runtime_suspend_fn, \
+					 runtime_resume_fn, idle_fn)
+#endif
+
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.
+ *
+ * If the underlying dev_pm_ops struct symbol has to be exported, use
+ * EXPORT_SIMPLE_DEV_PM_OPS() or EXPORT_GPL_SIMPLE_DEV_PM_OPS() instead.
  */
 #define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-const struct dev_pm_ops name = { \
-	SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
-}
+	_DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
+
+#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+	_EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "")
+#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+	_EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl")
 
 /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
 #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-- 
cgit v1.2.3


From 9d8619190031af0a314bee865262d8975473e4dd Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Fri, 7 Jan 2022 18:17:21 +0000
Subject: PM: runtime: Add DEFINE_RUNTIME_DEV_PM_OPS() macro

A lot of drivers create a dev_pm_ops struct with the system sleep
suspend/resume callbacks set to pm_runtime_force_suspend() and
pm_runtime_force_resume().

These drivers can now use the DEFINE_RUNTIME_DEV_PM_OPS() macro, which
will use pm_runtime_force_{suspend,resume}() as the system sleep
callbacks, while having the same dead code removal characteristic that
is already provided by DEFINE_SIMPLE_DEV_PM_OPS().

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h         |  3 ++-
 include/linux/pm_runtime.h | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 8279af2c538a..f7d2be686359 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -414,7 +414,8 @@ const struct dev_pm_ops __maybe_unused name = { \
  * .resume_early(), to the same routines as .runtime_suspend() and
  * .runtime_resume(), respectively (and analogously for hibernation).
  *
- * Deprecated. You most likely don't want this macro.
+ * Deprecated. You most likely don't want this macro. Use
+ * DEFINE_RUNTIME_DEV_PM_OPS() instead.
  */
 #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
 const struct dev_pm_ops __maybe_unused name = { \
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 016de5776b6d..4af454d29281 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -22,6 +22,20 @@
 					    usage_count */
 #define RPM_AUTO		0x08	/* Use autosuspend_delay */
 
+/*
+ * Use this for defining a set of PM operations to be used in all situations
+ * (system suspend, hibernation or runtime PM).
+ *
+ * Note that the behaviour differs from the deprecated UNIVERSAL_DEV_PM_OPS()
+ * macro, which uses the provided callbacks for both runtime PM and system
+ * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend()
+ * and pm_runtime_force_resume() for its system sleep callbacks.
+ */
+#define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+	_DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \
+			   pm_runtime_force_resume, suspend_fn, \
+			   resume_fn, idle_fn)
+
 #ifdef CONFIG_PM
 extern struct workqueue_struct *pm_wq;
 
-- 
cgit v1.2.3


From d59ff7d9d84b03d22c5107f794e28fc8e1fce3a6 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Fri, 7 Jan 2022 18:17:22 +0000
Subject: PM: runtime: Add EXPORT[_GPL]_RUNTIME_DEV_PM_OPS macros

Similar to EXPORT[_GPL]_SIMPLE_DEV_PM_OPS, but for users with runtime-PM
suspend/resume callbacks.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 4af454d29281..9f09601c465a 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -30,12 +30,22 @@
  * macro, which uses the provided callbacks for both runtime PM and system
  * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend()
  * and pm_runtime_force_resume() for its system sleep callbacks.
+ *
+ * If the underlying dev_pm_ops struct symbol has to be exported, use
+ * EXPORT_RUNTIME_DEV_PM_OPS() or EXPORT_GPL_RUNTIME_DEV_PM_OPS() instead.
  */
 #define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
 	_DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \
 			   pm_runtime_force_resume, suspend_fn, \
 			   resume_fn, idle_fn)
 
+#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+	_EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+			   suspend_fn, resume_fn, idle_fn, "")
+#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+	_EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+			   suspend_fn, resume_fn, idle_fn, "_gpl")
+
 #ifdef CONFIG_PM
 extern struct workqueue_struct *pm_wq;
 
-- 
cgit v1.2.3


From 2d7c86a8f9cdce1408c4f3c69d94d007eff2f179 Mon Sep 17 00:00:00 2001
From: Venky Shankar <vshankar@redhat.com>
Date: Wed, 14 Jul 2021 15:35:50 +0530
Subject: libceph: generalize addr/ip parsing based on delimiter

... and remove hardcoded function name in ceph_parse_ips().

[ idryomov: delim parameter, drop CEPH_ADDR_PARSE_DEFAULT_DELIM ]

Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h   | 2 +-
 include/linux/ceph/messenger.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 409d8c29bc4f..c72285d8594e 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -301,7 +301,7 @@ struct fs_parameter;
 struct fc_log;
 struct ceph_options *ceph_alloc_options(void);
 int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
-		       struct fc_log *l);
+		       struct fc_log *l, char delim);
 int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
 		     struct fc_log *l);
 int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 0e6e9ad3c3bf..ff99ce094cfa 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
 
 extern int ceph_parse_ips(const char *c, const char *end,
 			  struct ceph_entity_addr *addr,
-			  int max_count, int *count);
+			  int max_count, int *count, char delim);
 
 extern int ceph_msgr_init(void);
 extern void ceph_msgr_exit(void);
-- 
cgit v1.2.3


From 4153c7fc937a2afa077dbdb9fe3189b9981f423c Mon Sep 17 00:00:00 2001
From: Venky Shankar <vshankar@redhat.com>
Date: Wed, 14 Jul 2021 15:35:51 +0530
Subject: libceph: rename parse_fsid() to ceph_parse_fsid() and export

... as it is too generic. also, use __func__ when logging
rather than hardcoding the function name.

Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index c72285d8594e..644f224eccf7 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -296,6 +296,7 @@ extern bool libceph_compatible(void *data);
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
 extern void *ceph_kvmalloc(size_t size, gfp_t flags);
+extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid);
 
 struct fs_parameter;
 struct fc_log;
-- 
cgit v1.2.3


From a0b3a15eab6bc2e90008460b646d53e7d9dcdbbb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 10 Jan 2022 18:28:33 -0500
Subject: ceph: move CEPH_SUPER_MAGIC definition to magic.h

The uapi headers are missing the ceph definition. Move it there so
userland apps can ID cephfs.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/uapi/linux/magic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 35687dcb1a42..53a3c20394cf 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -6,6 +6,7 @@
 #define AFFS_SUPER_MAGIC	0xadff
 #define AFS_SUPER_MAGIC                0x5346414F
 #define AUTOFS_SUPER_MAGIC	0x0187
+#define CEPH_SUPER_MAGIC	0x00c36400
 #define CODA_SUPER_MAGIC	0x73757245
 #define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
 #define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
-- 
cgit v1.2.3


From 91341fa0003befd097e190ec2a4bf63ad957c49a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 13 Jan 2022 01:22:29 -0800
Subject: inet: frags: annotate races around fqdir->dead and fqdir->high_thresh

Both fields can be read/written without synchronization,
add proper accessors and documentation.

Fixes: d5dd88794a13 ("inet: fix various use-after-free in defrags units")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 11 +++++++++--
 include/net/ipv6_frag.h |  3 ++-
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 48cc5795ceda..63540be0fc34 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
 
 static inline void fqdir_pre_exit(struct fqdir *fqdir)
 {
-	fqdir->high_thresh = 0; /* prevent creation of new frags */
-	fqdir->dead = true;
+	/* Prevent creation of new frags.
+	 * Pairs with READ_ONCE() in inet_frag_find().
+	 */
+	WRITE_ONCE(fqdir->high_thresh, 0);
+
+	/* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+	 * and ip6frag_expire_frag_queue().
+	 */
+	WRITE_ONCE(fqdir->dead, true);
 }
 void fqdir_exit(struct fqdir *fqdir);
 
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 851029ecff13..0a4779175a52 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
 	struct sk_buff *head;
 
 	rcu_read_lock();
-	if (fq->q.fqdir->dead)
+	/* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+	if (READ_ONCE(fq->q.fqdir->dead))
 		goto out_rcu_unlock;
 	spin_lock(&fq->q.lock);
 
-- 
cgit v1.2.3


From fb80445c438c78b40b547d12b8d56596ce4ccfeb Mon Sep 17 00:00:00 2001
From: Kevin Bracey <kevin@bracey.fi>
Date: Wed, 12 Jan 2022 19:02:10 +0200
Subject: net_sched: restore "mpu xxx" handling

commit 56b765b79e9a ("htb: improved accuracy at high rates") broke
"overhead X", "linklayer atm" and "mpu X" attributes.

"overhead X" and "linklayer atm" have already been fixed. This restores
the "mpu X" handling, as might be used by DOCSIS or Ethernet shaping:

    tc class add ... htb rate X overhead 4 mpu 64

The code being fixed is used by htb, tbf and act_police. Cake has its
own mpu handling. qdisc_calculate_pkt_len still uses the size table
containing values adjusted for mpu by user space.

iproute2 tc has always passed mpu into the kernel via a tc_ratespec
structure, but the kernel never directly acted on it, merely stored it
so that it could be read back by `tc class show`.

Rather, tc would generate length-to-time tables that included the mpu
(and linklayer) in their construction, and the kernel used those tables.

Since v3.7, the tables were no longer used. Along with "mpu", this also
broke "overhead" and "linklayer" which were fixed in 01cb71d2d47b
("net_sched: restore "overhead xxx" handling", v3.10) and 8a8e3d84b171
("net_sched: restore "linklayer atm" handling", v3.11).

"overhead" was fixed by simply restoring use of tc_ratespec::overhead -
this had originally been used by the kernel but was initially omitted
from the new non-table-based calculations.

"linklayer" had been handled in the table like "mpu", but the mode was
not originally passed in tc_ratespec. The new implementation was made to
handle it by getting new versions of tc to pass the mode in an extended
tc_ratespec, and for older versions of tc the table contents were analysed
at load time to deduce linklayer.

As "mpu" has always been given to the kernel in tc_ratespec,
accompanying the mpu-based table, we can restore system functionality
with no userspace change by making the kernel act on the tc_ratespec
value.

Fixes: 56b765b79e9a ("htb: improved accuracy at high rates")
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Vimalkumar <j.vimal@gmail.com>
Link: https://lore.kernel.org/r/20220112170210.1014351-1-kevin@bracey.fi
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c11dbac5abb2..472843eedbae 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1244,6 +1244,7 @@ struct psched_ratecfg {
 	u64	rate_bytes_ps; /* bytes per second */
 	u32	mult;
 	u16	overhead;
+	u16	mpu;
 	u8	linklayer;
 	u8	shift;
 };
@@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
 {
 	len += r->overhead;
 
+	if (len < r->mpu)
+		len = r->mpu;
+
 	if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
 		return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
 
@@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
 	res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
 
 	res->overhead = r->overhead;
+	res->mpu = r->mpu;
 	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
-- 
cgit v1.2.3


From 180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 Mon Sep 17 00:00:00 2001
From: Laibin Qiu <qiulaibin@huawei.com>
Date: Thu, 13 Jan 2022 10:55:36 +0800
Subject: blk-mq: fix tag_get wait task can't be awakened

In case of shared tags, there might be more than one hctx which
allocates from the same tags, and each hctx is limited to allocate at
most:
        hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);

tag idle detection is lazy, and may be delayed for 30sec, so there
could be just one real active hctx(queue) but all others are actually
idle and still accounted as active because of the lazy idle detection.
Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
forever on this real active hctx.

Fix this by recalculating wake_batch when inc or dec active_queues.

Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
Suggested-by: Ming Lei <ming.lei@redhat.com>
Suggested-by: John Garry <john.garry@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220113025536.1479653-1-qiulaibin@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
 	sbitmap_free(&sbq->sb);
 }
 
+/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+					    unsigned int users);
+
 /**
  * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
  * @sbq: Bitmap queue to resize.
-- 
cgit v1.2.3


From 289e7b0f7eb47b87a0441e6c81336316f301eb39 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 13 Dec 2021 11:08:53 +0100
Subject: tracing: Account bottom half disabled sections.

Disabling only bottom halves via local_bh_disable() disables also
preemption but this remains invisible to tracing. On a CONFIG_PREEMPT
kernel one might wonder why there is no scheduling happening despite the
N flag in the trace. The reason might be the a rcu_read_lock_bh()
section.

Add a 'b' to the tracing output if in task context with disabled bottom
halves.

Link: https://lkml.kernel.org/r/YbcbtdtC/bjCKo57@linutronix.de

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3900404aa063..70c069aef02c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -172,6 +172,7 @@ enum trace_flag_type {
 	TRACE_FLAG_SOFTIRQ		= 0x10,
 	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
 	TRACE_FLAG_NMI			= 0x40,
+	TRACE_FLAG_BH_OFF		= 0x80,
 };
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-- 
cgit v1.2.3


From 6840f9094f2bd788a316d8cb0a4e42538d3e47dd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 13 Jan 2022 16:44:19 -0500
Subject: pagevec: Initialise folio_batch->percpu_pvec_drained

When UBSAN is enabled, it reports an invalid value in __pagevec_release()
when accessing pvec->percpu_pvec_drained, which is simply whatever
garbage was on the stack.  Initialise it when initialising the rest of
the folio_batch.

Fixes: 10331795fb79 ("pagevec: Add folio_batch")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagevec.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index dda8d5868c81..67b1246f136b 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -111,6 +111,7 @@ static_assert(offsetof(struct pagevec, pages) ==
 static inline void folio_batch_init(struct folio_batch *fbatch)
 {
 	fbatch->nr = 0;
+	fbatch->percpu_pvec_drained = false;
 }
 
 static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
-- 
cgit v1.2.3


From e5b48ee30aec1fe6dff05e36b22e886c665b4736 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Tue, 4 Jan 2022 16:14:46 +0900
Subject: ata: sata_fsl: fix scsi host initialization

When compiling with W=1, the sata_fsl driver compilation throws the
warning:

drivers/ata/sata_fsl.c:1385:22: error: initialized field overwritten
[-Werror=override-init]
 1385 |         .can_queue = SATA_FSL_QUEUE_DEPTH,

This is due to the driver scsi host template initialization overwriting
the can_queue field that is already set using the ATA_NCQ_SHT()
initializer macro, resulting in the same field being initialized twice
in the host template declaration.

To remove this warning, introduce the ATA_SUBBASE_SHT_QD() and
ATA_NCQ_SHT_QD() initialization macros to allow specifying a queue depth
different from the default ATA_DEF_QUEUE using an additional argument to
the macro.

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 include/linux/libata.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index c258f69106f4..2e5e7c40c991 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1385,6 +1385,12 @@ extern const struct attribute_group *ata_common_sdev_groups[];
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.slave_configure	= ata_scsi_slave_config
 
+#define ATA_SUBBASE_SHT_QD(drv_name, drv_qd)			\
+	__ATA_BASE_SHT(drv_name),				\
+	.can_queue		= drv_qd,			\
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
+	.slave_configure	= ata_scsi_slave_config
+
 #define ATA_BASE_SHT(drv_name)					\
 	ATA_SUBBASE_SHT(drv_name),				\
 	.sdev_groups		= ata_common_sdev_groups
@@ -1396,6 +1402,11 @@ extern const struct attribute_group *ata_ncq_sdev_groups[];
 	ATA_SUBBASE_SHT(drv_name),				\
 	.sdev_groups		= ata_ncq_sdev_groups,		\
 	.change_queue_depth	= ata_scsi_change_queue_depth
+
+#define ATA_NCQ_SHT_QD(drv_name, drv_qd)			\
+	ATA_SUBBASE_SHT_QD(drv_name, drv_qd),			\
+	.sdev_groups		= ata_ncq_sdev_groups,		\
+	.change_queue_depth	= ata_scsi_change_queue_depth
 #endif
 
 /*
-- 
cgit v1.2.3


From 0561e514c944da874ccdfbe2922f71b4c333c7e1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Tue, 4 Jan 2022 17:54:18 +0900
Subject: ata: fix read_id() ata port operation interface

Drivers that need to tweak a device IDENTIFY data implement the
read_id() port operation. The IDENTIFY data buffer is passed as an
argument to the read_id() operation for drivers to use. However, when
this operation is called, the IDENTIFY data is not yet converted to CPU
endian and contains le16 words.

Change the interface of the read_id operation to pass a __le16 * pointer
to the IDENTIFY data buffer to clarify the buffer endianness. Fix the
pata_netcell, pata_it821x, ahci_xgene, ahci_ceva and ahci_brcm drivers
implementation of this operation and modify the code to corretly deal
with identify data words manipulation to avoid sparse warnings such as:

drivers/ata/ahci_xgene.c:262:33: warning: invalid assignment: &=
drivers/ata/ahci_xgene.c:262:33:    left side has type unsigned short
drivers/ata/ahci_xgene.c:262:33:    right side has type restricted __le16

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 include/linux/libata.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2e5e7c40c991..bf706cd45674 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -884,7 +884,8 @@ struct ata_port_operations {
 	void (*set_piomode)(struct ata_port *ap, struct ata_device *dev);
 	void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev);
 	int  (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev);
-	unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id);
+	unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf,
+				__le16 *id);
 
 	void (*dev_config)(struct ata_device *dev);
 
@@ -1119,7 +1120,7 @@ extern void ata_id_string(const u16 *id, unsigned char *s,
 extern void ata_id_c_string(const u16 *id, unsigned char *s,
 			    unsigned int ofs, unsigned int len);
 extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
-					struct ata_taskfile *tf, u16 *id);
+				       struct ata_taskfile *tf, __le16 *id);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern u64 ata_qc_get_active(struct ata_port *ap);
 extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
-- 
cgit v1.2.3


From b9ba367c513dbc165dd6c01266a59db4be2a3564 Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Wed, 5 Jan 2022 16:36:16 +0100
Subject: ata: libata: Rename link flag ATA_LFLAG_NO_DB_DELAY

Rename the link flag ATA_LFLAG_NO_DB_DELAY to
ATA_LFLAG_NO_DEBOUNCE_DELAY. The new name is longer, but clearer.

Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index bf706cd45674..605756f645be 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -143,7 +143,7 @@ enum {
 	ATA_LFLAG_NO_LPM	= (1 << 8), /* disable LPM on this link */
 	ATA_LFLAG_RST_ONCE	= (1 << 9), /* limit recovery to one reset */
 	ATA_LFLAG_CHANGED	= (1 << 10), /* LPM state changed on this link */
-	ATA_LFLAG_NO_DB_DELAY	= (1 << 11), /* no debounce delay on link resume */
+	ATA_LFLAG_NO_DEBOUNCE_DELAY = (1 << 11), /* no debounce delay on link resume */
 
 	/* struct ata_port flags */
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
-- 
cgit v1.2.3


From be50b2065dfa3d88428fdfdc340d154d96bf6848 Mon Sep 17 00:00:00 2001
From: Guang Zeng <guang.zeng@intel.com>
Date: Wed, 5 Jan 2022 04:35:29 -0800
Subject: kvm: x86: Add support for getting/setting expanded xstate buffer

With KVM_CAP_XSAVE, userspace uses a hardcoded 4KB buffer to get/set
xstate data from/to KVM. This doesn't work when dynamic xfeatures
(e.g. AMX) are exposed to the guest as they require a larger buffer
size.

Introduce a new capability (KVM_CAP_XSAVE2). Userspace VMM gets the
required xstate buffer size via KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2).
KVM_SET_XSAVE is extended to work with both legacy and new capabilities
by doing properly-sized memdup_user() based on the guest fpu container.
KVM_GET_XSAVE is kept for backward-compatible reason. Instead,
KVM_GET_XSAVE2 is introduced under KVM_CAP_XSAVE2 as the preferred
interface for getting xstate buffer (4KB or larger size) from KVM
(Link: https://lkml.org/lkml/2021/12/15/510)

Also, update the api doc with the new KVM_GET_XSAVE2 ioctl.

Signed-off-by: Guang Zeng <guang.zeng@intel.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <20220105123532.12586-19-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index fbfd70d965c6..9563d294f181 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1132,6 +1132,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_MTE 205
 #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
 #define KVM_CAP_VM_GPA_BITS 207
+#define KVM_CAP_XSAVE2 208
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1622,6 +1623,9 @@ struct kvm_enc_region {
 #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
 #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
 
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2		  _IOR(KVMIO,  0xcf, struct kvm_xsave)
+
 struct kvm_s390_pv_sec_parm {
 	__u64 origin;
 	__u64 length;
-- 
cgit v1.2.3


From d9679d0013a66849f23057978f92e76b255c50aa Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 13 Oct 2021 06:55:44 -0400
Subject: virtio: wrap config->reset calls

This will enable cleanups down the road.
The idea is to disable cbs, then add "flush_queued_cbs" callback
as a parameter, this way drivers can flush any work
queued after callbacks have been disabled.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/r/20211013105226.20225-1-mst@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 41edbc01ffa4..72292a62cd90 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -138,6 +138,7 @@ int virtio_finalize_features(struct virtio_device *dev);
 int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
 #endif
+void virtio_reset_device(struct virtio_device *dev);
 
 size_t virtio_max_dma_size(struct virtio_device *vdev);
 
-- 
cgit v1.2.3


From 539fec78edb4e084e7c532affc56cc42d4ceea4b Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 26 Nov 2021 17:47:53 +0100
Subject: vdpa: add driver_override support

`driver_override` allows to control which of the vDPA bus drivers
binds to a vDPA device.

If `driver_override` is not set, the previous behaviour is followed:
devices use the first vDPA bus driver loaded (unless auto binding
is disabled).

Tested on Fedora 34 with driverctl(8):
  $ modprobe virtio-vdpa
  $ modprobe vhost-vdpa
  $ modprobe vdpa-sim-net

  $ vdpa dev add mgmtdev vdpasim_net name dev1

  # dev1 is attached to the first vDPA bus driver loaded
  $ driverctl -b vdpa list-devices
    dev1 virtio_vdpa

  $ driverctl -b vdpa set-override dev1 vhost_vdpa

  $ driverctl -b vdpa list-devices
    dev1 vhost_vdpa [*]

  Note: driverctl(8) integrates with udev so the binding is
  preserved.

Suggested-by: Jason Wang <jasowang@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20211126164753.181829-3-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index c3011ccda430..ae34015b37b7 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -64,6 +64,7 @@ struct vdpa_mgmt_dev;
  * struct vdpa_device - representation of a vDPA device
  * @dev: underlying device
  * @dma_dev: the actual device that is performing DMA
+ * @driver_override: driver name to force a match
  * @config: the configuration ops for this device.
  * @cf_mutex: Protects get and set access to configuration layout.
  * @index: device index
@@ -76,6 +77,7 @@ struct vdpa_mgmt_dev;
 struct vdpa_device {
 	struct device dev;
 	struct device *dma_dev;
+	const char *driver_override;
 	const struct vdpa_config_ops *config;
 	struct mutex cf_mutex; /* Protects get/set config */
 	unsigned int index;
-- 
cgit v1.2.3


From 28cc408be72cebb0f3fcc37bc74ab3196d4de726 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 4 Nov 2021 20:52:48 +0100
Subject: vdpa: Mark vdpa_config_ops.get_vq_notification as optional
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since vhost_vdpa_mmap checks for its existence before calling it.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Link: https://lore.kernel.org/r/20211104195248.2088904-1-eperezma@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 include/linux/vdpa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index ae34015b37b7..2b7db96bb7d3 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -157,7 +157,7 @@ struct vdpa_map_file {
  *				@vdev: vdpa device
  *				@idx: virtqueue index
  *				@state: pointer to returned state (last_avail_idx)
- * @get_vq_notification:	Get the notification area for a virtqueue
+ * @get_vq_notification:	Get the notification area for a virtqueue (optional)
  *				@vdev: vdpa device
  *				@idx: virtqueue index
  *				Returns the notifcation area
-- 
cgit v1.2.3


From a64917bc2e9b1e0aa716b783c4ec879fdd280300 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Wed, 5 Jan 2022 13:46:33 +0200
Subject: vdpa: Provide interface to read driver features

Provide an interface to read the negotiated features. This is needed
when building the netlink message in vdpa_dev_net_config_fill().

Also fix the implementation of vdpa_dev_net_config_fill() to use the
negotiated features instead of the device features.

To make APIs clearer, make the following name changes to struct
vdpa_config_ops so they better describe their operations:

get_features -> get_device_features
set_features -> set_driver_features

Finally, add get_driver_features to return the negotiated features and
add implementation to all the upstream drivers.

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20220105114646.577224-2-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 2b7db96bb7d3..9cc4291a79b3 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -171,14 +171,17 @@ struct vdpa_map_file {
  *				for the device
  *				@vdev: vdpa device
  *				Returns virtqueue algin requirement
- * @get_features:		Get virtio features supported by the device
+ * @get_device_features:	Get virtio features supported by the device
  *				@vdev: vdpa device
  *				Returns the virtio features support by the
  *				device
- * @set_features:		Set virtio features supported by the driver
+ * @set_driver_features:	Set virtio features supported by the driver
  *				@vdev: vdpa device
  *				@features: feature support by the driver
  *				Returns integer: success (0) or error (< 0)
+ * @get_driver_features:	Get the virtio driver features in action
+ *				@vdev: vdpa device
+ *				Returns the virtio features accepted
  * @set_config_cb:		Set the config interrupt callback
  *				@vdev: vdpa device
  *				@cb: virtio-vdev interrupt callback structure
@@ -278,8 +281,9 @@ struct vdpa_config_ops {
 
 	/* Device ops */
 	u32 (*get_vq_align)(struct vdpa_device *vdev);
-	u64 (*get_features)(struct vdpa_device *vdev);
-	int (*set_features)(struct vdpa_device *vdev, u64 features);
+	u64 (*get_device_features)(struct vdpa_device *vdev);
+	int (*set_driver_features)(struct vdpa_device *vdev, u64 features);
+	u64 (*get_driver_features)(struct vdpa_device *vdev);
 	void (*set_config_cb)(struct vdpa_device *vdev,
 			      struct vdpa_callback *cb);
 	u16 (*get_vq_num_max)(struct vdpa_device *vdev);
@@ -397,7 +401,7 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
 	const struct vdpa_config_ops *ops = vdev->config;
 
 	vdev->features_valid = true;
-	return ops->set_features(vdev, features);
+	return ops->set_driver_features(vdev, features);
 }
 
 void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
-- 
cgit v1.2.3


From 73bc0dbb591baea322a7319c735e5f6c7dba9cfb Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Wed, 5 Jan 2022 13:46:35 +0200
Subject: vdpa: Sync calls set/get config/status with cf_mutex

Add wrappers to get/set status and protect these operations with
cf_mutex to serialize these operations with respect to get/set config
operations.

Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20220105114646.577224-4-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 9cc4291a79b3..ae047fae2603 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
 		     void *buf, unsigned int len);
 void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
 		     const void *buf, unsigned int length);
+u8 vdpa_get_status(struct vdpa_device *vdev);
+void vdpa_set_status(struct vdpa_device *vdev, u8 status);
+
 /**
  * struct vdpa_mgmtdev_ops - vdpa device ops
  * @dev_add: Add a vdpa device using alloc and register
-- 
cgit v1.2.3


From aba21aff772b8622e08f07219069be793429a48f Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Wed, 5 Jan 2022 13:46:37 +0200
Subject: vdpa: Allow to configure max data virtqueues

Add netlink support to configure the max virtqueue pairs for a device.
At least one pair is required. The maximum is dictated by the device.

Example:
$ vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 4

Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20220105114646.577224-6-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index ae047fae2603..6d4d7e4fe208 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -101,6 +101,7 @@ struct vdpa_dev_set_config {
 	struct {
 		u8 mac[ETH_ALEN];
 		u16 mtu;
+		u16 max_vq_pairs;
 	} net;
 	u64 mask;
 };
@@ -391,17 +392,29 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
 static inline int vdpa_reset(struct vdpa_device *vdev)
 {
 	const struct vdpa_config_ops *ops = vdev->config;
+	int ret;
 
+	mutex_lock(&vdev->cf_mutex);
 	vdev->features_valid = false;
-	return ops->reset(vdev);
+	ret = ops->reset(vdev);
+	mutex_unlock(&vdev->cf_mutex);
+	return ret;
 }
 
-static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
+static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked)
 {
 	const struct vdpa_config_ops *ops = vdev->config;
+	int ret;
+
+	if (!locked)
+		mutex_lock(&vdev->cf_mutex);
 
 	vdev->features_valid = true;
-	return ops->set_driver_features(vdev, features);
+	ret = ops->set_driver_features(vdev, features);
+	if (!locked)
+		mutex_unlock(&vdev->cf_mutex);
+
+	return ret;
 }
 
 void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
-- 
cgit v1.2.3


From 612f330ec56f12c0d099286c45f82d835845f136 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Wed, 5 Jan 2022 13:46:40 +0200
Subject: vdpa: Add support for returning device configuration information

Add netlink attribute to store the negotiated features. This can be used
by userspace to get the current state of the vdpa instance.

Examples:

$ vdpa dev config show vdpa-a
vdpa-a: mac 00:00:00:00:88:88 link up link_announce false max_vq_pairs 16 mtu 1500
  negotiated_features CSUM GUEST_CSUM MTU MAC HOST_TSO4 HOST_TSO6 STATUS \
  CTRL_VQ MQ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM

$ vdpa -j dev config show vdpa-a
{"config":{"vdpa-a":{"mac":"00:00:00:00:88:88","link ":"up","link_announce":false, \
 "max_vq_pairs":16,"mtu":1500,"negotiated_features":["CSUM","GUEST_CSUM","MTU","MAC", \
 "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR","VERSION_1", \
 "ACCESS_PLATFORM"]}}}

$ vdpa -jp dev config show vdpa-a
{
    "config": {
        "vdpa-a": {
            "mac": "00:00:00:00:88:88",
            "link ": "up",
            "link_announce ": false,
            "max_vq_pairs": 16,
            "mtu": 1500,
            "negotiated_features": [
"CSUM","GUEST_CSUM","MTU","MAC","HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ", \
"CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM"
]
        }
    }
}

Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20220105114646.577224-9-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 include/uapi/linux/vdpa.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index a252f06f9dfd..db3738ef3beb 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -23,6 +23,9 @@ enum vdpa_command {
 enum vdpa_attr {
 	VDPA_ATTR_UNSPEC,
 
+	/* Pad attribute for 64b alignment */
+	VDPA_ATTR_PAD = VDPA_ATTR_UNSPEC,
+
 	/* bus name (optional) + dev name together make the parent device handle */
 	VDPA_ATTR_MGMTDEV_BUS_NAME,		/* string */
 	VDPA_ATTR_MGMTDEV_DEV_NAME,		/* string */
@@ -40,6 +43,7 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_NET_CFG_MAX_VQP,		/* u16 */
 	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */
 
+	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
 };
-- 
cgit v1.2.3


From cd2629f6df1cab5b3df34705ae7f3bde6147fce3 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Wed, 5 Jan 2022 13:46:42 +0200
Subject: vdpa: Support reporting max device capabilities

Add max_supported_vqs and supported_features fields to struct
vdpa_mgmt_dev. Upstream drivers need to feel these values according to
the device capabilities.

These values are reported back in a netlink message when showing management
devices.

Examples:

$ auxiliary/mlx5_core.sf.1:
  supported_classes net
  max_supported_vqs 257
  dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \
               CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM

$ vdpa -j mgmtdev show
{"mgmtdev":{"auxiliary/mlx5_core.sf.1":{"supported_classes":["net"], \
  "max_supported_vqs":257,"dev_features":["CSUM","GUEST_CSUM","MTU", \
  "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR", \
  "VERSION_1","ACCESS_PLATFORM"]}}}

$ vdpa -jp mgmtdev show
{
    "mgmtdev": {
        "auxiliary/mlx5_core.sf.1": {
            "supported_classes": [ "net" ],
            "max_supported_vqs": 257,
            "dev_features": ["CSUM","GUEST_CSUM","MTU","HOST_TSO4", \
                             "HOST_TSO6","STATUS","CTRL_VQ","MQ", \
                             "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM"]
        }
    }
}

Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20220105114646.577224-11-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
---
 include/linux/vdpa.h      | 2 ++
 include/uapi/linux/vdpa.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 6d4d7e4fe208..a6047fd6cf12 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -460,6 +460,8 @@ struct vdpa_mgmt_dev {
 	const struct virtio_device_id *id_table;
 	u64 config_attr_mask;
 	struct list_head list;
+	u64 supported_features;
+	u32 max_supported_vqs;
 };
 
 int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index db3738ef3beb..1061d8d2d09d 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -44,6 +44,8 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */
 
 	VDPA_ATTR_DEV_NEGOTIATED_FEATURES,	/* u64 */
+	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS,		/* u32 */
+	VDPA_ATTR_DEV_SUPPORTED_FEATURES,	/* u64 */
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
 };
-- 
cgit v1.2.3


From f6d955d80830b6e6f6a170be68cc3628f36365dd Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Tue, 11 Jan 2022 20:33:57 +0200
Subject: vdpa: Avoid taking cf_mutex lock on get status

Avoid the wrapper holding cf_mutex since it is not protecting anything.
To avoid confusion and unnecessary overhead incurred by it, remove.

Fixes: f489f27bc0ab ("vdpa: Sync calls set/get config/status with cf_mutex")
Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20220111183400.38418-2-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Si-Wei Liu<si-wei.liu@oracle.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 include/linux/vdpa.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index a6047fd6cf12..2de442ececae 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -421,7 +421,6 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
 		     void *buf, unsigned int len);
 void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
 		     const void *buf, unsigned int length);
-u8 vdpa_get_status(struct vdpa_device *vdev);
 void vdpa_set_status(struct vdpa_device *vdev, u8 status);
 
 /**
-- 
cgit v1.2.3


From 800977f6f32e452cba6b04ef21d2f5383ca29209 Mon Sep 17 00:00:00 2001
From: Cai Huoqing <caihuoqing@baidu.com>
Date: Fri, 14 Jan 2022 14:02:52 -0800
Subject: kthread: add the helper function kthread_run_on_cpu()

Add a new helper function kthread_run_on_cpu(), which includes
kthread_create_on_cpu/wake_up_process().

In some cases, use kthread_run_on_cpu() directly instead of
kthread_create_on_node/kthread_bind/wake_up_process() or
kthread_create_on_cpu/wake_up_process() or
kthreadd_create/kthread_bind/wake_up_process() to simplify the code.

[akpm@linux-foundation.org: export kthread_create_on_cpu to modules]

Link: https://lkml.kernel.org/r/20211022025711.3673-2-caihuoqing@baidu.com
Signed-off-by: Cai Huoqing <caihuoqing@baidu.com>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: Cai Huoqing <caihuoqing@baidu.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 346b0f269161..db47aae7c481 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -56,6 +56,31 @@ bool kthread_is_per_cpu(struct task_struct *k);
 	__k;								   \
 })
 
+/**
+ * kthread_run_on_cpu - create and wake a cpu bound thread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @cpu: The cpu on which the thread should be bound,
+ * @namefmt: printf-style name for the thread. Format is restricted
+ *	     to "name.*%u". Code fills in cpu number.
+ *
+ * Description: Convenient wrapper for kthread_create_on_cpu()
+ * followed by wake_up_process().  Returns the kthread or
+ * ERR_PTR(-ENOMEM).
+ */
+static inline struct task_struct *
+kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
+			unsigned int cpu, const char *namefmt)
+{
+	struct task_struct *p;
+
+	p = kthread_create_on_cpu(threadfn, data, cpu, namefmt);
+	if (!IS_ERR(p))
+		wake_up_process(p);
+
+	return p;
+}
+
 void free_kthread_struct(struct task_struct *k);
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
-- 
cgit v1.2.3


From 60115fa54ad7b913b7cb5844e6b7ffeb842d55f2 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 14 Jan 2022 14:04:11 -0800
Subject: mm: defer kmemleak object creation of module_alloc()

Yongqiang reports a kmemleak panic when module insmod/rmmod with KASAN
enabled(without KASAN_VMALLOC) on x86[1].

When the module area allocates memory, it's kmemleak_object is created
successfully, but the KASAN shadow memory of module allocation is not
ready, so when kmemleak scan the module's pointer, it will panic due to
no shadow memory with KASAN check.

  module_alloc
    __vmalloc_node_range
      kmemleak_vmalloc
				kmemleak_scan
				  update_checksum
    kasan_module_alloc
      kmemleak_ignore

Note, there is no problem if KASAN_VMALLOC enabled, the modules area
entire shadow memory is preallocated.  Thus, the bug only exits on ARCH
which supports dynamic allocation of module area per module load, for
now, only x86/arm64/s390 are involved.

Add a VM_DEFER_KMEMLEAK flags, defer vmalloc'ed object register of
kmemleak in module_alloc() to fix this issue.

[1] https://lore.kernel.org/all/6d41e2b9-4692-5ec4-b1cd-cbe29ae89739@huawei.com/

[wangkefeng.wang@huawei.com: fix build]
  Link: https://lkml.kernel.org/r/20211125080307.27225-1-wangkefeng.wang@huawei.com
[akpm@linux-foundation.org: simplify ifdefs, per Andrey]
  Link: https://lkml.kernel.org/r/CA+fCnZcnwJHUQq34VuRxpdoY6_XbJCDJ-jopksS5Eia4PijPzw@mail.gmail.com

Link: https://lkml.kernel.org/r/20211124142034.192078-1-wangkefeng.wang@huawei.com
Fixes: 793213a82de4 ("s390/kasan: dynamic shadow mem allocation for modules")
Fixes: 39d114ddc682 ("arm64: add KASAN support")
Fixes: bebf56a1b176 ("kasan: enable instrumentation of global variables")
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reported-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h   | 4 ++--
 include/linux/vmalloc.h | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d8783b682669..89c99e5e67de 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -474,12 +474,12 @@ static inline void kasan_populate_early_vm_area_shadow(void *start,
  * allocations with real shadow memory. With KASAN vmalloc, the special
  * case is unnecessary, as the work is handled in the generic case.
  */
-int kasan_module_alloc(void *addr, size_t size);
+int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask);
 void kasan_free_shadow(const struct vm_struct *vm);
 
 #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
 
-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
+static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
 static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
 #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 6e022cc712e6..880227b9f044 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -28,6 +28,13 @@ struct notifier_block;		/* in notifier.h */
 #define VM_MAP_PUT_PAGES	0x00000200	/* put pages and free array in vfree */
 #define VM_NO_HUGE_VMAP		0x00000400	/* force PAGE_SIZE pte mapping */
 
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+	!defined(CONFIG_KASAN_VMALLOC)
+#define VM_DEFER_KMEMLEAK	0x00000800	/* defer kmemleak object creation */
+#else
+#define VM_DEFER_KMEMLEAK	0
+#endif
+
 /*
  * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC.
  *
-- 
cgit v1.2.3


From c4386bd8ee3a921c3c799b7197dc898ade76a453 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Fri, 14 Jan 2022 14:04:22 -0800
Subject: mm/memremap: add ZONE_DEVICE support for compound pages

Add a new @vmemmap_shift property for struct dev_pagemap which specifies
that a devmap is composed of a set of compound pages of order
@vmemmap_shift, instead of base pages.  When a compound page devmap is
requested, all but the first page are initialised as tail pages instead
of order-0 pages.

For certain ZONE_DEVICE users like device-dax which have a fixed page
size, this creates an opportunity to optimize GUP and GUP-fast walkers,
treating it the same way as THP or hugetlb pages.

Additionally, commit 7118fc2906e2 ("hugetlb: address ref count racing in
prep_compound_gigantic_page") removed set_page_count() because the
setting of page ref count to zero was redundant.  devmap pages don't
come from page allocator though and only head page refcount is used for
compound pages, hence initialize tail page count to zero.

Link: https://lkml.kernel.org/r/20211202204422.26777-5-joao.m.martins@oracle.com
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memremap.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index c0e9d35889e8..61a6a0e27359 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -99,6 +99,11 @@ struct dev_pagemap_ops {
  * @done: completion for @internal_ref
  * @type: memory type: see MEMORY_* in memory_hotplug.h
  * @flags: PGMAP_* flags to specify defailed behavior
+ * @vmemmap_shift: structural definition of how the vmemmap page metadata
+ *      is populated, specifically the metadata page order.
+ *	A zero value (default) uses base pages as the vmemmap metadata
+ *	representation. A bigger value will set up compound struct pages
+ *	of the requested order value.
  * @ops: method table
  * @owner: an opaque pointer identifying the entity that manages this
  *	instance.  Used by various helpers to make sure that no
@@ -114,6 +119,7 @@ struct dev_pagemap {
 	struct completion done;
 	enum memory_type type;
 	unsigned int flags;
+	unsigned long vmemmap_shift;
 	const struct dev_pagemap_ops *ops;
 	void *owner;
 	int nr_range;
@@ -130,6 +136,11 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
 	return NULL;
 }
 
+static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
+{
+	return 1 << pgmap->vmemmap_shift;
+}
+
 #ifdef CONFIG_ZONE_DEVICE
 void *memremap_pages(struct dev_pagemap *pgmap, int nid);
 void memunmap_pages(struct dev_pagemap *pgmap);
-- 
cgit v1.2.3


From 3e9d80a891df3b1a5d77db47fa7fdf33ba71e5cb Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 Jan 2022 14:05:04 -0800
Subject: mm,fs: split dump_mapping() out from dump_page()

dump_mapping() is a big chunk of dump_page(), and it'd be handy to be
able to call it when we don't have a struct page.  Split it out and move
it to fs/inode.c.  Take the opportunity to simplify some of the debug
messages a little.

Link: https://lkml.kernel.org/r/20211121121056.2870061-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..5315fa68f751 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3152,6 +3152,7 @@ extern void unlock_new_inode(struct inode *);
 extern void discard_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 extern void evict_inodes(struct super_block *sb);
+void dump_mapping(const struct address_space *);
 
 /*
  * Userspace may rely on the the inode number being non-zero. For example, glibc
-- 
cgit v1.2.3


From b6bf9abb0aa44e53ffe9c1e6e1d32568f5b25e4a Mon Sep 17 00:00:00 2001
From: Dan Schatzberg <schatzberg.dan@gmail.com>
Date: Fri, 14 Jan 2022 14:05:35 -0800
Subject: mm/memcg: add oom_group_kill memory event

Our container agent wants to know when a container exits if it was OOM
killed or not to report to the user.  We use memory.oom.group = 1 to
ensure that OOM kills within the container's cgroup kill everything.
Existing memory.events are insufficient for knowing if this triggered:

1) Our current approach reads memory.events oom_kill and reports the
   container was killed if the value is non-zero. This is erroneous in
   some cases where containers create their children cgroups with
   memory.oom.group=1 as such OOM kills will get counted against the
   container cgroup's oom_kill counter despite not actually OOM killing
   the entire container.

2) Reading memory.events.local will fail to identify OOM kills in leaf
   cgroups (that don't set memory.oom.group) within the container
   cgroup.

This patch adds a new oom_group_kill event when memory.oom.group
triggers to allow userspace to cleanly identify when an entire cgroup is
oom killed.

[schatzberg.dan@gmail.com: changes from Johannes and Chris]
  Link: https://lkml.kernel.org/r/20211213162511.2492267-1-schatzberg.dan@gmail.com

Link: https://lkml.kernel.org/r/20211203162426.3375036-1-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Chris Down <chris@chrisdown.name>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c5c403f4be6..951f24f42147 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -42,6 +42,7 @@ enum memcg_memory_event {
 	MEMCG_MAX,
 	MEMCG_OOM,
 	MEMCG_OOM_KILL,
+	MEMCG_OOM_GROUP_KILL,
 	MEMCG_SWAP_HIGH,
 	MEMCG_SWAP_MAX,
 	MEMCG_SWAP_FAIL,
-- 
cgit v1.2.3


From 4e5aa1f4c2b489bc6f3ab5ca54747b18a847289d Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Fri, 14 Jan 2022 14:05:45 -0800
Subject: memcg: add per-memcg vmalloc stat

The kvmalloc* allocation functions can fallback to vmalloc allocations
and more often on long running machines.  In addition the kernel does
have __GFP_ACCOUNT kvmalloc* calls.  So, often on long running machines,
the memory.stat does not tell the complete picture which type of memory
is charged to the memcg.  So add a per-memcg vmalloc stat.

[shakeelb@google.com: page_memcg() within rcu lock, per Muchun]
  Link: https://lkml.kernel.org/r/20211222052457.1960701-1-shakeelb@google.com
[akpm@linux-foundation.org: remove cast, per Muchun]
[shakeelb@google.com: remove area->page[0] checks and move to page by page accounting per Michal]
  Link: https://lkml.kernel.org/r/20220104222341.3972772-1-shakeelb@google.com

Link: https://lkml.kernel.org/r/20211221215336.1922823-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 951f24f42147..0131e5574c88 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -33,6 +33,7 @@ enum memcg_stat_item {
 	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
+	MEMCG_VMALLOC,
 	MEMCG_NR_STAT,
 };
 
@@ -992,6 +993,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
+static inline void mod_memcg_page_state(struct page *page,
+					int idx, int val)
+{
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	rcu_read_lock();
+	memcg = page_memcg(page);
+	if (memcg)
+		mod_memcg_state(memcg, idx, val);
+	rcu_read_unlock();
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 {
 	return READ_ONCE(memcg->vmstats.state[idx]);
@@ -1447,6 +1463,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
 {
 }
 
+static inline void mod_memcg_page_state(struct page *page,
+					int idx, int val)
+{
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 {
 	return 0;
-- 
cgit v1.2.3


From 9a10064f5625d5572c3626c1516e0bebc6c9fe9b Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@google.com>
Date: Fri, 14 Jan 2022 14:05:59 -0800
Subject: mm: add a field to store names for private anonymous memory

In many userspace applications, and especially in VM based applications
like Android uses heavily, there are multiple different allocators in
use.  At a minimum there is libc malloc and the stack, and in many cases
there are libc malloc, the stack, direct syscalls to mmap anonymous
memory, and multiple VM heaps (one for small objects, one for big
objects, etc.).  Each of these layers usually has its own tools to
inspect its usage; malloc by compiling a debug version, the VM through
heap inspection tools, and for direct syscalls there is usually no way
to track them.

On Android we heavily use a set of tools that use an extended version of
the logic covered in Documentation/vm/pagemap.txt to walk all pages
mapped in userspace and slice their usage by process, shared (COW) vs.
unique mappings, backing, etc.  This can account for real physical
memory usage even in cases like fork without exec (which Android uses
heavily to share as many private COW pages as possible between
processes), Kernel SamePage Merging, and clean zero pages.  It produces
a measurement of the pages that only exist in that process (USS, for
unique), and a measurement of the physical memory usage of that process
with the cost of shared pages being evenly split between processes that
share them (PSS).

If all anonymous memory is indistinguishable then figuring out the real
physical memory usage (PSS) of each heap requires either a pagemap
walking tool that can understand the heap debugging of every layer, or
for every layer's heap debugging tools to implement the pagemap walking
logic, in which case it is hard to get a consistent view of memory
across the whole system.

Tracking the information in userspace leads to all sorts of problems.
It either needs to be stored inside the process, which means every
process has to have an API to export its current heap information upon
request, or it has to be stored externally in a filesystem that somebody
needs to clean up on crashes.  It needs to be readable while the process
is still running, so it has to have some sort of synchronization with
every layer of userspace.  Efficiently tracking the ranges requires
reimplementing something like the kernel vma trees, and linking to it
from every layer of userspace.  It requires more memory, more syscalls,
more runtime cost, and more complexity to separately track regions that
the kernel is already tracking.

This patch adds a field to /proc/pid/maps and /proc/pid/smaps to show a
userspace-provided name for anonymous vmas.  The names of named
anonymous vmas are shown in /proc/pid/maps and /proc/pid/smaps as
[anon:<name>].

Userspace can set the name for a region of memory by calling

   prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name)

Setting the name to NULL clears it.  The name length limit is 80 bytes
including NUL-terminator and is checked to contain only printable ascii
characters (including space), except '[',']','\','$' and '`'.

Ascii strings are being used to have a descriptive identifiers for vmas,
which can be understood by the users reading /proc/pid/maps or
/proc/pid/smaps.  Names can be standardized for a given system and they
can include some variable parts such as the name of the allocator or a
library, tid of the thread using it, etc.

The name is stored in a pointer in the shared union in vm_area_struct
that points to a null terminated string.  Anonymous vmas with the same
name (equivalent strings) and are otherwise mergeable will be merged.
The name pointers are not shared between vmas even if they contain the
same name.  The name pointer is stored in a union with fields that are
only used on file-backed mappings, so it does not increase memory usage.

CONFIG_ANON_VMA_NAME kernel configuration is introduced to enable this
feature.  It keeps the feature disabled by default to prevent any
additional memory overhead and to avoid confusing procfs parsers on
systems which are not ready to support named anonymous vmas.

The patch is based on the original patch developed by Colin Cross, more
specifically on its latest version [1] posted upstream by Sumit Semwal.
It used a userspace pointer to store vma names.  In that design, name
pointers could be shared between vmas.  However during the last
upstreaming attempt, Kees Cook raised concerns [2] about this approach
and suggested to copy the name into kernel memory space, perform
validity checks [3] and store as a string referenced from
vm_area_struct.

One big concern is about fork() performance which would need to strdup
anonymous vma names.  Dave Hansen suggested experimenting with
worst-case scenario of forking a process with 64k vmas having longest
possible names [4].  I ran this experiment on an ARM64 Android device
and recorded a worst-case regression of almost 40% when forking such a
process.

This regression is addressed in the followup patch which replaces the
pointer to a name with a refcounted structure that allows sharing the
name pointer between vmas of the same name.  Instead of duplicating the
string during fork() or when splitting a vma it increments the refcount.

[1] https://lore.kernel.org/linux-mm/20200901161459.11772-4-sumit.semwal@linaro.org/
[2] https://lore.kernel.org/linux-mm/202009031031.D32EF57ED@keescook/
[3] https://lore.kernel.org/linux-mm/202009031022.3834F692@keescook/
[4] https://lore.kernel.org/linux-mm/5d0358ab-8c47-2f5f-8e43-23b89d6a8e95@intel.com/

Changes for prctl(2) manual page (in the options section):

PR_SET_VMA
	Sets an attribute specified in arg2 for virtual memory areas
	starting from the address specified in arg3 and spanning the
	size specified	in arg4. arg5 specifies the value of the attribute
	to be set. Note that assigning an attribute to a virtual memory
	area might prevent it from being merged with adjacent virtual
	memory areas due to the difference in that attribute's value.

	Currently, arg2 must be one of:

	PR_SET_VMA_ANON_NAME
		Set a name for anonymous virtual memory areas. arg5 should
		be a pointer to a null-terminated string containing the
		name. The name length including null byte cannot exceed
		80 bytes. If arg5 is NULL, the name of the appropriate
		anonymous virtual memory areas will be reset. The name
		can contain only printable ascii characters (including
                space), except '[',']','\','$' and '`'.

                This feature is available only if the kernel is built with
                the CONFIG_ANON_VMA_NAME option enabled.

[surenb@google.com: docs: proc.rst: /proc/PID/maps: fix malformed table]
  Link: https://lkml.kernel.org/r/20211123185928.2513763-1-surenb@google.com
[surenb: rebased over v5.15-rc6, replaced userpointer with a kernel copy,
 added input sanitization and CONFIG_ANON_VMA_NAME config. The bulk of the
 work here was done by Colin Cross, therefore, with his permission, keeping
 him as the author]

Link: https://lkml.kernel.org/r/20211019215511.3771969-2-surenb@google.com
Signed-off-by: Colin Cross <ccross@google.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Glauber <jan.glauber@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Landley <rob@landley.net>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Shaohua Li <shli@fusionio.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h         | 13 +++++++++-
 include/linux/mm_types.h   | 64 +++++++++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/prctl.h |  3 +++
 3 files changed, 75 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..7000442984b9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2658,7 +2658,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-	struct mempolicy *, struct vm_userfaultfd_ctx);
+	struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
 	unsigned long addr, int new_below);
@@ -3391,5 +3391,16 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
 	return 0;
 }
 
+#ifdef CONFIG_ANON_VMA_NAME
+int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+			  unsigned long len_in, const char *name);
+#else
+static inline int
+madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+		      unsigned long len_in, const char *name) {
+	return 0;
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c3a6e6209600..799e2ee626b2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -426,11 +426,19 @@ struct vm_area_struct {
 	/*
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree.
+	 *
+	 * For private anonymous mappings, a pointer to a null terminated string
+	 * containing the name given to the vma, or NULL if unnamed.
 	 */
-	struct {
-		struct rb_node rb;
-		unsigned long rb_subtree_last;
-	} shared;
+
+	union {
+		struct {
+			struct rb_node rb;
+			unsigned long rb_subtree_last;
+		} shared;
+		/* Serialized by mmap_sem. */
+		char *anon_name;
+	};
 
 	/*
 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -875,4 +883,52 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
+#ifdef CONFIG_ANON_VMA_NAME
+/*
+ * mmap_lock should be read-locked when calling vma_anon_name() and while using
+ * the returned pointer.
+ */
+extern const char *vma_anon_name(struct vm_area_struct *vma);
+
+/*
+ * mmap_lock should be read-locked for orig_vma->vm_mm.
+ * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
+ * isolated.
+ */
+extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+			      struct vm_area_struct *new_vma);
+
+/*
+ * mmap_lock should be write-locked or vma should have been isolated under
+ * write-locked mmap_lock protection.
+ */
+extern void free_vma_anon_name(struct vm_area_struct *vma);
+
+/* mmap_lock should be read-locked */
+static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
+					 const char *name)
+{
+	const char *vma_name = vma_anon_name(vma);
+
+	/* either both NULL, or pointers to same string */
+	if (vma_name == name)
+		return true;
+
+	return name && vma_name && !strcmp(name, vma_name);
+}
+#else /* CONFIG_ANON_VMA_NAME */
+static inline const char *vma_anon_name(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+			      struct vm_area_struct *new_vma) {}
+static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
+static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
+					 const char *name)
+{
+	return true;
+}
+#endif  /* CONFIG_ANON_VMA_NAME */
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index bb73e9a0b24f..e998764f0262 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -272,4 +272,7 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SCOPE_THREAD_GROUP	1
 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP	2
 
+#define PR_SET_VMA		0x53564d41
+# define PR_SET_VMA_ANON_NAME		0
+
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From 78db3412833dc9c479cd17412035f216cfd01a29 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Fri, 14 Jan 2022 14:06:03 -0800
Subject: mm: add anonymous vma name refcounting

While forking a process with high number (64K) of named anonymous vmas
the overhead caused by strdup() is noticeable.  Experiments with ARM64
Android device show up to 40% performance regression when forking a
process with 64k unpopulated anonymous vmas using the max name lengths
vs the same process with the same number of anonymous vmas having no
name.

Introduce anon_vma_name refcounted structure to avoid the overhead of
copying vma names during fork() and when splitting named anonymous vmas.

When a vma is duplicated, instead of copying the name we increment the
refcount of this structure.  Multiple vmas can point to the same
anon_vma_name as long as they increment the refcount.  The name member
of anon_vma_name structure is assigned at structure allocation time and
is never changed.  If vma name changes then the refcount of the original
structure is dropped, a new anon_vma_name structure is allocated to hold
the new name and the vma pointer is updated to point to the new
structure.

With this approach the fork() performance regressions is reduced 3-4x
times and with usecases using more reasonable number of VMAs (a few
thousand) the regressions is not measurable.

Link: https://lkml.kernel.org/r/20211019215511.3771969-3-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Colin Cross <ccross@google.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Glauber <jan.glauber@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Landley <rob@landley.net>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Shaohua Li <shli@fusionio.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 799e2ee626b2..449b6eafc695 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -5,6 +5,7 @@
 #include <linux/mm_types_task.h>
 
 #include <linux/auxvec.h>
+#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
@@ -386,6 +387,12 @@ struct vm_userfaultfd_ctx {
 struct vm_userfaultfd_ctx {};
 #endif /* CONFIG_USERFAULTFD */
 
+struct anon_vma_name {
+	struct kref kref;
+	/* The name needs to be at the end because it is dynamically sized. */
+	char name[];
+};
+
 /*
  * This struct describes a virtual memory area. There is one of these
  * per VM-area/task. A VM area is any part of the process virtual memory
@@ -437,7 +444,7 @@ struct vm_area_struct {
 			unsigned long rb_subtree_last;
 		} shared;
 		/* Serialized by mmap_sem. */
-		char *anon_name;
+		struct anon_vma_name *anon_name;
 	};
 
 	/*
-- 
cgit v1.2.3


From 17fca131cee21724ee953a17c185c14e9533af5b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jan 2022 14:06:07 -0800
Subject: mm: move anon_vma declarations to linux/mm_inline.h

The patch to add anonymous vma names causes a build failure in some
configurations:

  include/linux/mm_types.h: In function 'is_same_vma_anon_name':
  include/linux/mm_types.h:924:37: error: implicit declaration of function 'strcmp' [-Werror=implicit-function-declaration]
    924 |         return name && vma_name && !strcmp(name, vma_name);
        |                                     ^~~~~~
  include/linux/mm_types.h:22:1: note: 'strcmp' is defined in header '<string.h>'; did you forget to '#include <string.h>'?

This should not really be part of linux/mm_types.h in the first place,
as that header is meant to only contain structure defintions and need a
minimum set of indirect includes itself.

While the header clearly includes more than it should at this point,
let's not make it worse by including string.h as well, which would pull
in the expensive (compile-speed wise) fortify-string logic.

Move the new functions into a separate header that only needs to be
included in a couple of locations.

Link: https://lkml.kernel.org/r/20211207125710.2503446-1-arnd@kernel.org
Fixes: "mm: add a field to store names for private anonymous memory"
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Colin Cross <ccross@google.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm_types.h  | 48 ---------------------------------------------
 2 files changed, 50 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e2ec68b0515c..47d96d2647ca 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -4,6 +4,7 @@
 
 #include <linux/huge_mm.h>
 #include <linux/swap.h>
+#include <linux/string.h>
 
 /**
  * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
@@ -135,4 +136,53 @@ static __always_inline void del_page_from_lru_list(struct page *page,
 {
 	lruvec_del_folio(lruvec, page_folio(page));
 }
+
+#ifdef CONFIG_ANON_VMA_NAME
+/*
+ * mmap_lock should be read-locked when calling vma_anon_name() and while using
+ * the returned pointer.
+ */
+extern const char *vma_anon_name(struct vm_area_struct *vma);
+
+/*
+ * mmap_lock should be read-locked for orig_vma->vm_mm.
+ * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
+ * isolated.
+ */
+extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+			      struct vm_area_struct *new_vma);
+
+/*
+ * mmap_lock should be write-locked or vma should have been isolated under
+ * write-locked mmap_lock protection.
+ */
+extern void free_vma_anon_name(struct vm_area_struct *vma);
+
+/* mmap_lock should be read-locked */
+static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
+					 const char *name)
+{
+	const char *vma_name = vma_anon_name(vma);
+
+	/* either both NULL, or pointers to same string */
+	if (vma_name == name)
+		return true;
+
+	return name && vma_name && !strcmp(name, vma_name);
+}
+#else /* CONFIG_ANON_VMA_NAME */
+static inline const char *vma_anon_name(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
+			      struct vm_area_struct *new_vma) {}
+static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
+static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
+					 const char *name)
+{
+	return true;
+}
+#endif  /* CONFIG_ANON_VMA_NAME */
+
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 449b6eafc695..4d5fb84eed5e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -890,52 +890,4 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
-#ifdef CONFIG_ANON_VMA_NAME
-/*
- * mmap_lock should be read-locked when calling vma_anon_name() and while using
- * the returned pointer.
- */
-extern const char *vma_anon_name(struct vm_area_struct *vma);
-
-/*
- * mmap_lock should be read-locked for orig_vma->vm_mm.
- * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
- * isolated.
- */
-extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
-			      struct vm_area_struct *new_vma);
-
-/*
- * mmap_lock should be write-locked or vma should have been isolated under
- * write-locked mmap_lock protection.
- */
-extern void free_vma_anon_name(struct vm_area_struct *vma);
-
-/* mmap_lock should be read-locked */
-static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
-					 const char *name)
-{
-	const char *vma_name = vma_anon_name(vma);
-
-	/* either both NULL, or pointers to same string */
-	if (vma_name == name)
-		return true;
-
-	return name && vma_name && !strcmp(name, vma_name);
-}
-#else /* CONFIG_ANON_VMA_NAME */
-static inline const char *vma_anon_name(struct vm_area_struct *vma)
-{
-	return NULL;
-}
-static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
-			      struct vm_area_struct *new_vma) {}
-static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
-static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
-					 const char *name)
-{
-	return true;
-}
-#endif  /* CONFIG_ANON_VMA_NAME */
-
 #endif /* _LINUX_MM_TYPES_H */
-- 
cgit v1.2.3


From 36090def7bad06a6346f86a7cfdbfda2d138cb64 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jan 2022 14:06:10 -0800
Subject: mm: move tlb_flush_pending inline helpers to mm_inline.h

linux/mm_types.h should only define structure definitions, to make it
cheap to include elsewhere.  The atomic_t helper function definitions
are particularly large, so it's better to move the helpers using those
into the existing linux/mm_inline.h and only include that where needed.

As a follow-up, we may want to go through all the indirect includes in
mm_types.h and reduce them as much as possible.

Link: https://lkml.kernel.org/r/20211207125710.2503446-2-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Colin Cross <ccross@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h        |  45 ----------------
 include/linux/mm_inline.h |  86 +++++++++++++++++++++++++++++++
 include/linux/mm_types.h  | 129 ++++++++++++++++------------------------------
 3 files changed, 131 insertions(+), 129 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7000442984b9..c17e5cfc1e47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -424,51 +424,6 @@ extern unsigned int kobjsize(const void *objp);
  */
 extern pgprot_t protection_map[16];
 
-/**
- * enum fault_flag - Fault flag definitions.
- * @FAULT_FLAG_WRITE: Fault was a write fault.
- * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
- * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
- * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
- * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
- * @FAULT_FLAG_TRIED: The fault has been tried once.
- * @FAULT_FLAG_USER: The fault originated in userspace.
- * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
- * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
- * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
- *
- * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
- * whether we would allow page faults to retry by specifying these two
- * fault flags correctly.  Currently there can be three legal combinations:
- *
- * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
- *                              this is the first try
- *
- * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
- *                              we've already tried at least once
- *
- * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
- *
- * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
- * be used.  Note that page faults can be allowed to retry for multiple times,
- * in which case we'll have an initial fault with flags (a) then later on
- * continuous faults with flags (b).  We should always try to detect pending
- * signals before a retry to make sure the continuous page faults can still be
- * interrupted if necessary.
- */
-enum fault_flag {
-	FAULT_FLAG_WRITE =		1 << 0,
-	FAULT_FLAG_MKWRITE =		1 << 1,
-	FAULT_FLAG_ALLOW_RETRY =	1 << 2,
-	FAULT_FLAG_RETRY_NOWAIT = 	1 << 3,
-	FAULT_FLAG_KILLABLE =		1 << 4,
-	FAULT_FLAG_TRIED = 		1 << 5,
-	FAULT_FLAG_USER =		1 << 6,
-	FAULT_FLAG_REMOTE =		1 << 7,
-	FAULT_FLAG_INSTRUCTION =	1 << 8,
-	FAULT_FLAG_INTERRUPTIBLE =	1 << 9,
-};
-
 /*
  * The default fault flags that should be used by most of the
  * arch-specific page fault handlers.
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 47d96d2647ca..b725839dfe71 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -2,6 +2,7 @@
 #ifndef LINUX_MM_INLINE_H
 #define LINUX_MM_INLINE_H
 
+#include <linux/atomic.h>
 #include <linux/huge_mm.h>
 #include <linux/swap.h>
 #include <linux/string.h>
@@ -185,4 +186,89 @@ static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
 }
 #endif  /* CONFIG_ANON_VMA_NAME */
 
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
+{
+	atomic_set(&mm->tlb_flush_pending, 0);
+}
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+	atomic_inc(&mm->tlb_flush_pending);
+	/*
+	 * The only time this value is relevant is when there are indeed pages
+	 * to flush. And we'll only flush pages after changing them, which
+	 * requires the PTL.
+	 *
+	 * So the ordering here is:
+	 *
+	 *	atomic_inc(&mm->tlb_flush_pending);
+	 *	spin_lock(&ptl);
+	 *	...
+	 *	set_pte_at();
+	 *	spin_unlock(&ptl);
+	 *
+	 *				spin_lock(&ptl)
+	 *				mm_tlb_flush_pending();
+	 *				....
+	 *				spin_unlock(&ptl);
+	 *
+	 *	flush_tlb_range();
+	 *	atomic_dec(&mm->tlb_flush_pending);
+	 *
+	 * Where the increment if constrained by the PTL unlock, it thus
+	 * ensures that the increment is visible if the PTE modification is
+	 * visible. After all, if there is no PTE modification, nobody cares
+	 * about TLB flushes either.
+	 *
+	 * This very much relies on users (mm_tlb_flush_pending() and
+	 * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
+	 * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
+	 * locks (PPC) the unlock of one doesn't order against the lock of
+	 * another PTL.
+	 *
+	 * The decrement is ordered by the flush_tlb_range(), such that
+	 * mm_tlb_flush_pending() will not return false unless all flushes have
+	 * completed.
+	 */
+}
+
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
+{
+	/*
+	 * See inc_tlb_flush_pending().
+	 *
+	 * This cannot be smp_mb__before_atomic() because smp_mb() simply does
+	 * not order against TLB invalidate completion, which is what we need.
+	 *
+	 * Therefore we must rely on tlb_flush_*() to guarantee order.
+	 */
+	atomic_dec(&mm->tlb_flush_pending);
+}
+
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+{
+	/*
+	 * Must be called after having acquired the PTL; orders against that
+	 * PTLs release and therefore ensures that if we observe the modified
+	 * PTE we must also observe the increment from inc_tlb_flush_pending().
+	 *
+	 * That is, it only guarantees to return true if there is a flush
+	 * pending for _this_ PTL.
+	 */
+	return atomic_read(&mm->tlb_flush_pending);
+}
+
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+	/*
+	 * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
+	 * for which there is a TLB flush pending in order to guarantee
+	 * we've seen both that PTE modification and the increment.
+	 *
+	 * (no requirement on actually still holding the PTL, that is irrelevant)
+	 */
+	return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
+
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4d5fb84eed5e..6a89f128c990 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -692,90 +692,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_finish_mmu(struct mmu_gather *tlb);
 
-static inline void init_tlb_flush_pending(struct mm_struct *mm)
-{
-	atomic_set(&mm->tlb_flush_pending, 0);
-}
-
-static inline void inc_tlb_flush_pending(struct mm_struct *mm)
-{
-	atomic_inc(&mm->tlb_flush_pending);
-	/*
-	 * The only time this value is relevant is when there are indeed pages
-	 * to flush. And we'll only flush pages after changing them, which
-	 * requires the PTL.
-	 *
-	 * So the ordering here is:
-	 *
-	 *	atomic_inc(&mm->tlb_flush_pending);
-	 *	spin_lock(&ptl);
-	 *	...
-	 *	set_pte_at();
-	 *	spin_unlock(&ptl);
-	 *
-	 *				spin_lock(&ptl)
-	 *				mm_tlb_flush_pending();
-	 *				....
-	 *				spin_unlock(&ptl);
-	 *
-	 *	flush_tlb_range();
-	 *	atomic_dec(&mm->tlb_flush_pending);
-	 *
-	 * Where the increment if constrained by the PTL unlock, it thus
-	 * ensures that the increment is visible if the PTE modification is
-	 * visible. After all, if there is no PTE modification, nobody cares
-	 * about TLB flushes either.
-	 *
-	 * This very much relies on users (mm_tlb_flush_pending() and
-	 * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
-	 * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
-	 * locks (PPC) the unlock of one doesn't order against the lock of
-	 * another PTL.
-	 *
-	 * The decrement is ordered by the flush_tlb_range(), such that
-	 * mm_tlb_flush_pending() will not return false unless all flushes have
-	 * completed.
-	 */
-}
-
-static inline void dec_tlb_flush_pending(struct mm_struct *mm)
-{
-	/*
-	 * See inc_tlb_flush_pending().
-	 *
-	 * This cannot be smp_mb__before_atomic() because smp_mb() simply does
-	 * not order against TLB invalidate completion, which is what we need.
-	 *
-	 * Therefore we must rely on tlb_flush_*() to guarantee order.
-	 */
-	atomic_dec(&mm->tlb_flush_pending);
-}
-
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
-	/*
-	 * Must be called after having acquired the PTL; orders against that
-	 * PTLs release and therefore ensures that if we observe the modified
-	 * PTE we must also observe the increment from inc_tlb_flush_pending().
-	 *
-	 * That is, it only guarantees to return true if there is a flush
-	 * pending for _this_ PTL.
-	 */
-	return atomic_read(&mm->tlb_flush_pending);
-}
-
-static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
-{
-	/*
-	 * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
-	 * for which there is a TLB flush pending in order to guarantee
-	 * we've seen both that PTE modification and the increment.
-	 *
-	 * (no requirement on actually still holding the PTL, that is irrelevant)
-	 */
-	return atomic_read(&mm->tlb_flush_pending) > 1;
-}
-
 struct vm_fault;
 
 /**
@@ -890,4 +806,49 @@ typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
+/**
+ * enum fault_flag - Fault flag definitions.
+ * @FAULT_FLAG_WRITE: Fault was a write fault.
+ * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
+ * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
+ * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
+ * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
+ * @FAULT_FLAG_TRIED: The fault has been tried once.
+ * @FAULT_FLAG_USER: The fault originated in userspace.
+ * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
+ * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
+ * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
+ *
+ * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
+ * whether we would allow page faults to retry by specifying these two
+ * fault flags correctly.  Currently there can be three legal combinations:
+ *
+ * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
+ *                              this is the first try
+ *
+ * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
+ *                              we've already tried at least once
+ *
+ * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
+ *
+ * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
+ * be used.  Note that page faults can be allowed to retry for multiple times,
+ * in which case we'll have an initial fault with flags (a) then later on
+ * continuous faults with flags (b).  We should always try to detect pending
+ * signals before a retry to make sure the continuous page faults can still be
+ * interrupted if necessary.
+ */
+enum fault_flag {
+	FAULT_FLAG_WRITE =		1 << 0,
+	FAULT_FLAG_MKWRITE =		1 << 1,
+	FAULT_FLAG_ALLOW_RETRY =	1 << 2,
+	FAULT_FLAG_RETRY_NOWAIT = 	1 << 3,
+	FAULT_FLAG_KILLABLE =		1 << 4,
+	FAULT_FLAG_TRIED = 		1 << 5,
+	FAULT_FLAG_USER =		1 << 6,
+	FAULT_FLAG_REMOTE =		1 << 7,
+	FAULT_FLAG_INSTRUCTION =	1 << 8,
+	FAULT_FLAG_INTERRUPTIBLE =	1 << 9,
+};
+
 #endif /* _LINUX_MM_TYPES_H */
-- 
cgit v1.2.3


From cc6dcfee72509868271d42919a3c1081b6b0dc7e Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Fri, 14 Jan 2022 14:06:18 -0800
Subject: mm: document locking restrictions for vm_operations_struct::close

Add comments for vm_operations_struct::close documenting locking
requirements for this callback and its callers.

Link: https://lkml.kernel.org/r/20211209191325.3069345-2-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Christian Brauner <christian@brauner.io>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Jan Engelhardt <jengelh@inai.de>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tim Murray <timmurray@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c17e5cfc1e47..4d7245e6802a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -532,6 +532,10 @@ enum page_entry_size {
  */
 struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
+	/**
+	 * @close: Called when the VMA is being removed from the MM.
+	 * Context: User context.  May sleep.  Caller holds mmap_lock.
+	 */
 	void (*close)(struct vm_area_struct * area);
 	/* Called any time before splitting to check if it's allowed */
 	int (*may_split)(struct vm_area_struct *area, unsigned long addr);
-- 
cgit v1.2.3


From 08d5b29eac7dd5e6c79b66d390ecbb9219e05931 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Fri, 14 Jan 2022 14:06:33 -0800
Subject: mm: ptep_clear() page table helper

We have ptep_get_and_clear() and ptep_get_and_clear_full() helpers to
clear PTE from user page tables, but there is no variant for simple
clear of a present PTE from user page tables without using a low level
pte_clear() which can be either native or para-virtualised.

Add a new ptep_clear() that can be used in common code to clear PTEs
from page table.  We will need this call later in order to add a hook
for page table check.

Link: https://lkml.kernel.org/r/20211221154650.1047963-3-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Greg Thelen <gthelen@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pgtable.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e24d2c992b11..bc8713a76e03 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -258,6 +258,14 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PTEP_CLEAR
+static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
+{
+	pte_clear(mm, addr, ptep);
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address,
-- 
cgit v1.2.3


From df4e817b710809425d899340dbfa8504a3ca4ba5 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Fri, 14 Jan 2022 14:06:37 -0800
Subject: mm: page table check

Check user page table entries at the time they are added and removed.

Allows to synchronously catch memory corruption issues related to double
mapping.

When a pte for an anonymous page is added into page table, we verify
that this pte does not already point to a file backed page, and vice
versa if this is a file backed page that is being added we verify that
this page does not have an anonymous mapping

We also enforce that read-only sharing for anonymous pages is allowed
(i.e.  cow after fork).  All other sharing must be for file pages.

Page table check allows to protect and debug cases where "struct page"
metadata became corrupted for some reason.  For example, when refcnt or
mapcount become invalid.

Link: https://lkml.kernel.org/r/20211221154650.1047963-4-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Greg Thelen <gthelen@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_table_check.h | 147 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 include/linux/page_table_check.h

(limited to 'include')

diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
new file mode 100644
index 000000000000..38cace1da7b6
--- /dev/null
+++ b/include/linux/page_table_check.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2021, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#ifndef __LINUX_PAGE_TABLE_CHECK_H
+#define __LINUX_PAGE_TABLE_CHECK_H
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+#include <linux/jump_label.h>
+
+extern struct static_key_true page_table_check_disabled;
+extern struct page_ext_operations page_table_check_ops;
+
+void __page_table_check_zero(struct page *page, unsigned int order);
+void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t pte);
+void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
+				  pmd_t pmd);
+void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
+				  pud_t pud);
+void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte);
+void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
+				pmd_t *pmdp, pmd_t pmd);
+void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
+				pud_t *pudp, pud_t pud);
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_zero(page, order);
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+					      unsigned long addr, pte_t pte)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_pte_clear(mm, addr, pte);
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+					      unsigned long addr, pmd_t pmd)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_pmd_clear(mm, addr, pmd);
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+					      unsigned long addr, pud_t pud)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_pud_clear(mm, addr, pud);
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep,
+					    pte_t pte)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_pte_set(mm, addr, ptep, pte);
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+					    unsigned long addr, pmd_t *pmdp,
+					    pmd_t pmd)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_pmd_set(mm, addr, pmdp, pmd);
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+					    unsigned long addr, pud_t *pudp,
+					    pud_t pud)
+{
+	if (static_branch_likely(&page_table_check_disabled))
+		return;
+
+	__page_table_check_pud_set(mm, addr, pudp, pud);
+}
+
+#else
+
+static inline void page_table_check_alloc(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_free(struct page *page, unsigned int order)
+{
+}
+
+static inline void page_table_check_pte_clear(struct mm_struct *mm,
+					      unsigned long addr, pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_clear(struct mm_struct *mm,
+					      unsigned long addr, pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_clear(struct mm_struct *mm,
+					      unsigned long addr, pud_t pud)
+{
+}
+
+static inline void page_table_check_pte_set(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep,
+					    pte_t pte)
+{
+}
+
+static inline void page_table_check_pmd_set(struct mm_struct *mm,
+					    unsigned long addr, pmd_t *pmdp,
+					    pmd_t pmd)
+{
+}
+
+static inline void page_table_check_pud_set(struct mm_struct *mm,
+					    unsigned long addr, pud_t *pudp,
+					    pud_t pud)
+{
+}
+
+#endif /* CONFIG_PAGE_TABLE_CHECK */
+#endif /* __LINUX_PAGE_TABLE_CHECK_H */
-- 
cgit v1.2.3


From 020e87650af9f43683546729f959fdc78422a4b7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 Jan 2022 14:06:44 -0800
Subject: mm: remove last argument of reuse_swap_page()

None of the callers care about the total_map_swapcount() any more.

Link: https://lkml.kernel.org/r/20211220205943.456187-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d1ea44b31f19..bdccbf1efa61 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -514,7 +514,7 @@ extern int __swp_swapcount(swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
-extern bool reuse_swap_page(struct page *, int *);
+extern bool reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
@@ -680,8 +680,8 @@ static inline int swp_swapcount(swp_entry_t entry)
 	return 0;
 }
 
-#define reuse_swap_page(page, total_map_swapcount) \
-	(page_trans_huge_mapcount(page, total_map_swapcount) == 1)
+#define reuse_swap_page(page) \
+	(page_trans_huge_mapcount(page, NULL) == 1)
 
 static inline int try_to_free_swap(struct page *page)
 {
-- 
cgit v1.2.3


From d08d2b62510e2407cf939e693aefd179dc114913 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 Jan 2022 14:06:51 -0800
Subject: mm: remove the total_mapcount argument from
 page_trans_huge_mapcount()

All callers pass NULL, so we can stop calculating the value we would
store in it.

Link: https://lkml.kernel.org/r/20211220205943.456187-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h   | 10 +++-------
 include/linux/swap.h |  2 +-
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d7245e6802a..cef65f9cbdf2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -799,19 +799,15 @@ static inline int page_mapcount(struct page *page)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int total_mapcount(struct page *page);
-int page_trans_huge_mapcount(struct page *page, int *total_mapcount);
+int page_trans_huge_mapcount(struct page *page);
 #else
 static inline int total_mapcount(struct page *page)
 {
 	return page_mapcount(page);
 }
-static inline int page_trans_huge_mapcount(struct page *page,
-					   int *total_mapcount)
+static inline int page_trans_huge_mapcount(struct page *page)
 {
-	int mapcount = page_mapcount(page);
-	if (total_mapcount)
-		*total_mapcount = mapcount;
-	return mapcount;
+	return page_mapcount(page);
 }
 #endif
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bdccbf1efa61..1d38d9475c4d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -681,7 +681,7 @@ static inline int swp_swapcount(swp_entry_t entry)
 }
 
 #define reuse_swap_page(page) \
-	(page_trans_huge_mapcount(page, NULL) == 1)
+	(page_trans_huge_mapcount(page) == 1)
 
 static inline int try_to_free_swap(struct page *page)
 {
-- 
cgit v1.2.3


From a421ef303008b0ceee2cfc625c3246fa7654b0ca Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 14 Jan 2022 14:07:07 -0800
Subject: mm: allow !GFP_KERNEL allocations for kvmalloc

Support for GFP_NO{FS,IO} and __GFP_NOFAIL has been implemented by
previous patches so we can allow the support for kvmalloc.  This will
allow some external users to simplify or completely remove their
helpers.

GFP_NOWAIT semantic hasn't been supported so far but it hasn't been
explicitly documented so let's add a note about that.

ceph_kvmalloc is the first helper to be dropped and changed to kvmalloc.

Link: https://lkml.kernel.org/r/20211122153233.9924-5-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ceph/libceph.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 409d8c29bc4f..309acbcb5a8a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -295,7 +295,6 @@ extern bool libceph_compatible(void *data);
 
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-extern void *ceph_kvmalloc(size_t size, gfp_t flags);
 
 struct fs_parameter;
 struct fc_log;
-- 
cgit v1.2.3


From 4034247a0d6ab281ba3293798ce67af494d86129 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 14 Jan 2022 14:07:14 -0800
Subject: mm: introduce memalloc_retry_wait()

Various places in the kernel - largely in filesystems - respond to a
memory allocation failure by looping around and re-trying.  Some of
these cannot conveniently use __GFP_NOFAIL, for reasons such as:

 - a GFP_ATOMIC allocation, which __GFP_NOFAIL doesn't work on
 - a need to check for the process being signalled between failures
 - the possibility that other recovery actions could be performed
 - the allocation is quite deep in support code, and passing down an
   extra flag to say if __GFP_NOFAIL is wanted would be clumsy.

Many of these currently use congestion_wait() which (in almost all
cases) simply waits the given timeout - congestion isn't tracked for
most devices.

It isn't clear what the best delay is for loops, but it is clear that
the various filesystems shouldn't be responsible for choosing a timeout.

This patch introduces memalloc_retry_wait() with takes on that
responsibility.  Code that wants to retry a memory allocation can call
this function passing the GFP flags that were used.  It will wait
however is appropriate.

For now, it only considers __GFP_NORETRY and whatever
gfpflags_allow_blocking() tests.  If blocking is allowed without
__GFP_NORETRY, then alloc_page either made some reclaim progress, or
waited for a while, before failing.  So there is no need for much
further waiting.  memalloc_retry_wait() will wait until the current
jiffie ends.  If this condition is not met, then alloc_page() won't have
waited much if at all.  In that case memalloc_retry_wait() waits about
200ms.  This is the delay that most current loops uses.

linux/sched/mm.h needs to be included in some files now,
but linux/backing-dev.h does not.

Link: https://lkml.kernel.org/r/163754371968.13692.1277530886009912421@noble.neil.brown.name
Signed-off-by: NeilBrown <neilb@suse.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index aca874d33fe6..aa5f09ca5bcf 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -214,6 +214,32 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
 static inline void fs_reclaim_release(gfp_t gfp_mask) { }
 #endif
 
+/* Any memory-allocation retry loop should use
+ * memalloc_retry_wait(), and pass the flags for the most
+ * constrained allocation attempt that might have failed.
+ * This provides useful documentation of where loops are,
+ * and a central place to fine tune the waiting as the MM
+ * implementation changes.
+ */
+static inline void memalloc_retry_wait(gfp_t gfp_flags)
+{
+	/* We use io_schedule_timeout because waiting for memory
+	 * typically included waiting for dirty pages to be
+	 * written out, which requires IO.
+	 */
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	gfp_flags = current_gfp_context(gfp_flags);
+	if (gfpflags_allow_blocking(gfp_flags) &&
+	    !(gfp_flags & __GFP_NORETRY))
+		/* Probably waited already, no need for much more */
+		io_schedule_timeout(1);
+	else
+		/* Probably didn't wait, and has now released a lock,
+		 * so now is a good time to wait
+		 */
+		io_schedule_timeout(HZ/50);
+}
+
 /**
  * might_alloc - Mark possible allocation sites
  * @gfp_mask: gfp_t flags that would be used to allocate
-- 
cgit v1.2.3


From 1611f74a94ba2e0f2d25b75008ed8e76e122097a Mon Sep 17 00:00:00 2001
From: Changcheng Deng <deng.changcheng@zte.com.cn>
Date: Fri, 14 Jan 2022 14:07:21 -0800
Subject: mm: fix boolreturn.cocci warning

Return statements in functions returning bool should use true/false
instead of 1/0.

Link: https://lkml.kernel.org/r/20211126073327.74815-1-deng.changcheng@zte.com.cn
Signed-off-by: Changcheng Deng <deng.changcheng@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b5f14d581113..18423c2157e8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -383,7 +383,7 @@ static __always_inline int TestClearPage##uname(struct page *page)	\
 	TESTCLEARFLAG(uname, lname, policy)
 
 #define TESTPAGEFLAG_FALSE(uname, lname)				\
-static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
+static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
 static inline int Page##uname(const struct page *page) { return 0; }
 
 #define SETPAGEFLAG_NOOP(uname, lname)					\
-- 
cgit v1.2.3


From be1a13eb51077b2ec5f7f4306f93dfece503a3f1 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 14 Jan 2022 14:07:27 -0800
Subject: mm: drop node from alloc_pages_vma

alloc_pages_vma is meant to allocate a page with a vma specific memory
policy.  The initial node parameter is always a local node so it is
pointless to waste a function argument for this.  Drop the parameter.

Link: https://lkml.kernel.org/r/YaSnlv4QpryEpesG@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Ben Widawsky <ben.widawsky@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8fcc38467af6..78b58448f796 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -598,9 +598,9 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order);
 struct folio *folio_alloc(gfp_t gfp, unsigned order);
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node, bool hugepage);
+			bool hugepage);
 #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
-	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+	alloc_pages_vma(gfp_mask, order, vma, addr, true)
 #else
 static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
@@ -610,14 +610,14 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
 {
 	return __folio_alloc_node(gfp, order, numa_node_id());
 }
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\
 	alloc_pages(gfp_mask, order)
 #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, false)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
-- 
cgit v1.2.3


From 04a536bfbd0f885338eecc2a4503dfca50ac94dd Mon Sep 17 00:00:00 2001
From: Miles Chen <miles.chen@mediatek.com>
Date: Fri, 14 Jan 2022 14:07:30 -0800
Subject: include/linux/gfp.h: further document GFP_DMA32

kmalloc(..., GFP_DMA32) does not return DMA32 memory because the DMA32
kmalloc cache array is not implemented.  (Reason: there is no such user
in kernel).

Put a short comment about this so people can understand this by reading
the comment.

[1] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html

Link: https://lkml.kernel.org/r/20211207093610.6406-1-miles.chen@mediatek.com
Signed-off-by: Miles Chen <miles.chen@mediatek.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 78b58448f796..80f63c862be5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -302,7 +302,9 @@ struct vm_area_struct;
  * lowest zone as a type of emergency reserve.
  *
  * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
- * address.
+ * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory
+ * because the DMA32 kmalloc cache array is not implemented.
+ * (Reason: there is no such user in kernel).
  *
  * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
  * do not need to be directly accessible by the kernel but that cannot
-- 
cgit v1.2.3


From 62b3107073646e0946bd97ff926832bafb846d17 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Fri, 14 Jan 2022 14:07:37 -0800
Subject: mm_zone: add function to check if managed dma zone exists

Patch series "Handle warning of allocation failure on DMA zone w/o
managed pages", v4.

**Problem observed:
On x86_64, when crash is triggered and entering into kdump kernel, page
allocation failure can always be seen.

 ---------------------------------
 DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations
 swapper/0: page allocation failure: order:5, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
 CPU: 0 PID: 1 Comm: swapper/0
 Call Trace:
  dump_stack+0x7f/0xa1
  warn_alloc.cold+0x72/0xd6
  ......
  __alloc_pages+0x24d/0x2c0
  ......
  dma_atomic_pool_init+0xdb/0x176
  do_one_initcall+0x67/0x320
  ? rcu_read_lock_sched_held+0x3f/0x80
  kernel_init_freeable+0x290/0x2dc
  ? rest_init+0x24f/0x24f
  kernel_init+0xa/0x111
  ret_from_fork+0x22/0x30
 Mem-Info:
 ------------------------------------

***Root cause:
In the current kernel, it assumes that DMA zone must have managed pages
and try to request pages if CONFIG_ZONE_DMA is enabled. While this is not
always true. E.g in kdump kernel of x86_64, only low 1M is presented and
locked down at very early stage of boot, so that this low 1M won't be
added into buddy allocator to become managed pages of DMA zone. This
exception will always cause page allocation failure if page is requested
from DMA zone.

***Investigation:
This failure happens since below commit merged into linus's tree.
  1a6a9044b967 x86/setup: Remove CONFIG_X86_RESERVE_LOW and reservelow= options
  23721c8e92f7 x86/crash: Remove crash_reserve_low_1M()
  f1d4d47c5851 x86/setup: Always reserve the first 1M of RAM
  7c321eb2b843 x86/kdump: Remove the backup region handling
  6f599d84231f x86/kdump: Always reserve the low 1M when the crashkernel option is specified

Before them, on x86_64, the low 640K area will be reused by kdump kernel.
So in kdump kernel, the content of low 640K area is copied into a backup
region for dumping before jumping into kdump. Then except of those firmware
reserved region in [0, 640K], the left area will be added into buddy
allocator to become available managed pages of DMA zone.

However, after above commits applied, in kdump kernel of x86_64, the low
1M is reserved by memblock, but not released to buddy allocator. So any
later page allocation requested from DMA zone will fail.

At the beginning, if crashkernel is reserved, the low 1M need be locked
down because AMD SME encrypts memory making the old backup region
mechanims impossible when switching into kdump kernel.

Later, it was also observed that there are BIOSes corrupting memory
under 1M. To solve this, in commit f1d4d47c5851, the entire region of
low 1M is always reserved after the real mode trampoline is allocated.

Besides, recently, Intel engineer mentioned their TDX (Trusted domain
extensions) which is under development in kernel also needs to lock down
the low 1M. So we can't simply revert above commits to fix the page allocation
failure from DMA zone as someone suggested.

***Solution:
Currently, only DMA atomic pool and dma-kmalloc will initialize and
request page allocation with GFP_DMA during bootup.

So only initializ DMA atomic pool when DMA zone has available managed
pages, otherwise just skip the initialization.

For dma-kmalloc(), for the time being, let's mute the warning of
allocation failure if requesting pages from DMA zone while no manged
pages.  Meanwhile, change code to use dma_alloc_xx/dma_map_xx API to
replace kmalloc(GFP_DMA), or do not use GFP_DMA when calling kmalloc() if
not necessary.  Christoph is posting patches to fix those under
drivers/scsi/.  Finally, we can remove the need of dma-kmalloc() as people
suggested.

This patch (of 3):

In some places of the current kernel, it assumes that dma zone must have
managed pages if CONFIG_ZONE_DMA is enabled.  While this is not always
true.  E.g in kdump kernel of x86_64, only low 1M is presented and locked
down at very early stage of boot, so that there's no managed pages at all
in DMA zone.  This exception will always cause page allocation failure if
page is requested from DMA zone.

Here add function has_managed_dma() and the relevant helper functions to
check if there's DMA zone with managed pages.  It will be used in later
patches.

Link: https://lkml.kernel.org/r/20211223094435.248523-1-bhe@redhat.com
Link: https://lkml.kernel.org/r/20211223094435.248523-2-bhe@redhat.com
Fixes: 6f599d84231f ("x86/kdump: Always reserve the low 1M when the crashkernel option is specified")
Signed-off-by: Baoquan He <bhe@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: John Donnelly  <john.p.donnelly@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 936dc0b6c226..aed44e9b5d89 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1047,6 +1047,15 @@ static inline int is_highmem_idx(enum zone_type idx)
 #endif
 }
 
+#ifdef CONFIG_ZONE_DMA
+bool has_managed_dma(void);
+#else
+static inline bool has_managed_dma(void)
+{
+	return false;
+}
+#endif
+
 /**
  * is_highmem - helper function to quickly check if a struct zone is a
  *              highmem zone or not.  This is an attempt to keep references
-- 
cgit v1.2.3


From f47761999052b1cc987dd3e3d3adf47997358fc0 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Fri, 14 Jan 2022 14:07:48 -0800
Subject: hugetlb: add hugetlb.*.numa_stat file

For hugetlb backed jobs/VMs it's critical to understand the numa
information for the memory backing these jobs to deliver optimal
performance.

Currently this technically can be queried from /proc/self/numa_maps, but
there are significant issues with that.  Namely:

1. Memory can be mapped or unmapped.

2. numa_maps are per process and need to be aggregated across all
   processes in the cgroup.  For shared memory this is more involved as
   the userspace needs to make sure it doesn't double count shared
   mappings.

3. I believe querying numa_maps needs to hold the mmap_lock which adds
   to the contention on this lock.

For these reasons I propose simply adding hugetlb.*.numa_stat file,
   which shows the numa information of the cgroup similarly to
   memory.numa_stat.

On cgroup-v2:
   cat /sys/fs/cgroup/unified/test/hugetlb.2MB.numa_stat
   total=2097152 N0=2097152 N1=0

On cgroup-v1:
   cat /sys/fs/cgroup/hugetlb/test/hugetlb.2MB.numa_stat
   total=2097152 N0=2097152 N1=0
   hierarichal_total=2097152 N0=2097152 N1=0

This patch was tested manually by allocating hugetlb memory and querying
the hugetlb.*.numa_stat file of the cgroup and its parents.

[colin.i.king@googlemail.com: fix spelling mistake "hierarichal" -> "hierarchical"]
  Link: https://lkml.kernel.org/r/20211125090635.23508-1-colin.i.king@gmail.com
[keescook@chromium.org: fix copy/paste array assignment]
  Link: https://lkml.kernel.org/r/20211203065647.2819707-1-keescook@chromium.org

Link: https://lkml.kernel.org/r/20211123001020.4083653-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jue Wang <juew@google.com>
Cc: Yang Yao <ygyao@google.com>
Cc: Joanna Li <joannali@google.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h        | 4 ++--
 include/linux/hugetlb_cgroup.h | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 00351ccb49a3..d1897a69c540 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -622,8 +622,8 @@ struct hstate {
 #endif
 #ifdef CONFIG_CGROUP_HUGETLB
 	/* cgroup control files */
-	struct cftype cgroup_files_dfl[7];
-	struct cftype cgroup_files_legacy[9];
+	struct cftype cgroup_files_dfl[8];
+	struct cftype cgroup_files_legacy[10];
 #endif
 	char name[HSTATE_NAME_LEN];
 };
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index ba025ae27882..379344828e78 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -36,6 +36,11 @@ enum hugetlb_memory_event {
 	HUGETLB_NR_MEMORY_EVENTS,
 };
 
+struct hugetlb_cgroup_per_node {
+	/* hugetlb usage in pages over all hstates. */
+	unsigned long usage[HUGE_MAX_HSTATE];
+};
+
 struct hugetlb_cgroup {
 	struct cgroup_subsys_state css;
 
@@ -57,6 +62,8 @@ struct hugetlb_cgroup {
 
 	/* Handle for "hugetlb.events.local" */
 	struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
+
+	struct hugetlb_cgroup_per_node *nodeinfo[];
 };
 
 static inline struct hugetlb_cgroup *
-- 
cgit v1.2.3


From e9ea874a8ffb0f8ebed4f4981531a32c5b663d79 Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang29@zte.com.cn>
Date: Fri, 14 Jan 2022 14:07:55 -0800
Subject: mm/vmstat: add events for THP max_ptes_* exceeds

There are interfaces to adjust max_ptes_none, max_ptes_swap,
max_ptes_shared values, see
  /sys/kernel/mm/transparent_hugepage/khugepaged/.

But system administrator may not know which value is the best.  So Add
those events to support adjusting max_ptes_* to suitable values.

For example, if default max_ptes_swap value causes too much failures,
and system uses zram whose IO is fast, administrator could increase
max_ptes_swap until THP_SCAN_EXCEED_SWAP_PTE not increase anymore.

Link: https://lkml.kernel.org/r/20211225094036.574157-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Saravanan D <saravanand@fb.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index a185cc75ff52..7b2363388bfa 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -98,6 +98,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
+		THP_SCAN_EXCEED_NONE_PTE,
+		THP_SCAN_EXCEED_SWAP_PTE,
+		THP_SCAN_EXCEED_SHARED_PTE,
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 		THP_SPLIT_PUD,
 #endif
-- 
cgit v1.2.3


From e4b424b7ec8791087375bb1f2480a3ba05d21e0b Mon Sep 17 00:00:00 2001
From: Gang Li <ligang.bdlg@bytedance.com>
Date: Fri, 14 Jan 2022 14:08:07 -0800
Subject: vmscan: make drop_slab_node static

drop_slab_node is only used in drop_slab.  So remove it's declaration
from header file and add keyword static for it's definition.

Link: https://lkml.kernel.org/r/20211111062445.5236-1-ligang.bdlg@bytedance.com
Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cef65f9cbdf2..eb67eb699b78 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3122,7 +3122,6 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *,
 #endif
 
 void drop_slab(void);
-void drop_slab_node(int nid);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
-- 
cgit v1.2.3


From c6018b4b254971863bd0ad36bb5e7d0fa0f0ddb0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 14 Jan 2022 14:08:17 -0800
Subject: mm/mempolicy: add set_mempolicy_home_node syscall

This syscall can be used to set a home node for the MPOL_BIND and
MPOL_PREFERRED_MANY memory policy.  Users should use this syscall after
setting up a memory policy for the specified range as shown below.

  mbind(p, nr_pages * page_size, MPOL_BIND, new_nodes->maskp,
        new_nodes->size + 1, 0);
  sys_set_mempolicy_home_node((unsigned long)p, nr_pages * page_size,
				home_node, 0);

The syscall allows specifying a home node/preferred node from which
kernel will fulfill memory allocation requests first.

For address range with MPOL_BIND memory policy, if nodemask specifies
more than one node, page allocations will come from the node in the
nodemask with sufficient free memory that is closest to the home
node/preferred node.

For MPOL_PREFERRED_MANY if the nodemask specifies more than one node,
page allocation will come from the node in the nodemask with sufficient
free memory that is closest to the home node/preferred node.  If there
is not enough memory in all the nodes specified in the nodemask, the
allocation will be attempted from the closest numa node to the home node
in the system.

This helps applications to hint at a memory allocation preference node
and fallback to _only_ a set of nodes if the memory is not available on
the preferred node.  Fallback allocation is attempted from the node
which is nearest to the preferred node.

This helps applications to have control on memory allocation numa nodes
and avoids default fallback to slow memory NUMA nodes.  For example a
system with NUMA nodes 1,2 and 3 with DRAM memory and 10, 11 and 12 of
slow memory

 new_nodes = numa_bitmask_alloc(nr_nodes);

 numa_bitmask_setbit(new_nodes, 1);
 numa_bitmask_setbit(new_nodes, 2);
 numa_bitmask_setbit(new_nodes, 3);

 p = mmap(NULL, nr_pages * page_size, protflag, mapflag, -1, 0);
 mbind(p, nr_pages * page_size, MPOL_BIND, new_nodes->maskp,  new_nodes->size + 1, 0);

 sys_set_mempolicy_home_node(p, nr_pages * page_size, 2, 0);

This will allocate from nodes closer to node 2 and will make sure the
kernel will only allocate from nodes 1, 2, and 3.  Memory will not be
allocated from slow memory nodes 10, 11, and 12.  This differs from
default MPOL_BIND behavior in that with default MPOL_BIND the allocation
will be attempted from node closer to the local node.  One of the
reasons to specify a home node is to allow allocations from cpu less
NUMA node and its nearby NUMA nodes.

With MPOL_PREFERRED_MANY on the other hand will first try to allocate
from the closest node to node 2 from the node list 1, 2 and 3.  If those
nodes don't have enough memory, kernel will allocate from slow memory
node 10, 11 and 12 which ever is closer to node 2.

Link: https://lkml.kernel.org/r/20211202123810.267175-3-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Ben Widawsky <ben.widawsky@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: <linux-api@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3c7595e81150..668389b4b53d 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -46,6 +46,7 @@ struct mempolicy {
 	unsigned short mode; 	/* See MPOL_* above */
 	unsigned short flags;	/* See set_mempolicy() MPOL_F_* above */
 	nodemask_t nodes;	/* interleave/bind/perfer */
+	int home_node;		/* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */
 
 	union {
 		nodemask_t cpuset_mems_allowed;	/* relative to these nodes */
-- 
cgit v1.2.3


From 21b084fdf2a49ca1634e8e360e9ab6f9ff0dee11 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 14 Jan 2022 14:08:21 -0800
Subject: mm/mempolicy: wire up syscall set_mempolicy_home_node

Link: https://lkml.kernel.org/r/20211202123810.267175-4-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Ben Widawsky <ben.widawsky@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: <linux-api@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h          | 3 +++
 include/uapi/asm-generic/unistd.h | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 528a478dbda8..819c0cb00b6d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1057,6 +1057,9 @@ asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type ru
 		const void __user *rule_attr, __u32 flags);
 asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags);
 asmlinkage long sys_memfd_secret(unsigned int flags);
+asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
+					    unsigned long home_node,
+					    unsigned long flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 4557a8b6086f..1c48b0ae3ba3 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 #define __NR_futex_waitv 449
 __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+
 #undef __NR_syscalls
-#define __NR_syscalls 450
+#define __NR_syscalls 451
 
 /*
  * 32 bit systems traditionally used different
-- 
cgit v1.2.3


From 84b328aa81216e08804d8875d63f26bda1298788 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Fri, 14 Jan 2022 14:08:40 -0800
Subject: mm: compaction: fix the migration stats in
 trace_mm_compaction_migratepages()

Now the migrate_pages() has changed to return the number of {normal
page, THP, hugetlb} instead, thus we should not use the return value to
calculate the number of pages migrated successfully.  Instead we can
just use the 'nr_succeeded' which indicates the number of normal pages
migrated successfully to calculate the non-migrated pages in
trace_mm_compaction_migratepages().

Link: https://lkml.kernel.org/r/b4225251c4bec068dcd90d275ab7de88a39e2bd7.1636275127.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/compaction.h | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 54e5bf081171..7d48e7079e48 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -68,10 +68,9 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
 TRACE_EVENT(mm_compaction_migratepages,
 
 	TP_PROTO(unsigned long nr_all,
-		int migrate_rc,
-		struct list_head *migratepages),
+		unsigned int nr_succeeded),
 
-	TP_ARGS(nr_all, migrate_rc, migratepages),
+	TP_ARGS(nr_all, nr_succeeded),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, nr_migrated)
@@ -79,23 +78,8 @@ TRACE_EVENT(mm_compaction_migratepages,
 	),
 
 	TP_fast_assign(
-		unsigned long nr_failed = 0;
-		struct list_head *page_lru;
-
-		/*
-		 * migrate_pages() returns either a non-negative number
-		 * with the number of pages that failed migration, or an
-		 * error code, in which case we need to count the remaining
-		 * pages manually
-		 */
-		if (migrate_rc >= 0)
-			nr_failed = migrate_rc;
-		else
-			list_for_each(page_lru, migratepages)
-				nr_failed++;
-
-		__entry->nr_migrated = nr_all - nr_failed;
-		__entry->nr_failed = nr_failed;
+		__entry->nr_migrated = nr_succeeded;
+		__entry->nr_failed = nr_all - nr_succeeded;
 	),
 
 	TP_printk("nr_migrated=%lu nr_failed=%lu",
-- 
cgit v1.2.3


From c0e582de6066e97c83a466f0e5983e3148123526 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 14 Jan 2022 14:08:56 -0800
Subject: mm/thp: drop unused trace events hugepage_[invalidate|splitting]

The trace events hugepage_[invalidate|splitting], were added via the
commit 9e813308a5c1 ("powerpc/thp: Add tracepoints to track hugepage
invalidate").  Afterwards their call sites i.e
trace_hugepage_[invalidate|splitting] were just dropped off, leaving
these trace points unused.

Link: https://lkml.kernel.org/r/1641546351-15109-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/thp.h | 35 -----------------------------------
 1 file changed, 35 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
index d7fbbe551841..ca3f2767828a 100644
--- a/include/trace/events/thp.h
+++ b/include/trace/events/thp.h
@@ -8,24 +8,6 @@
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(hugepage_invalidate,
-
-	    TP_PROTO(unsigned long addr, unsigned long pte),
-	    TP_ARGS(addr, pte),
-	    TP_STRUCT__entry(
-		    __field(unsigned long, addr)
-		    __field(unsigned long, pte)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->addr = addr;
-		    __entry->pte = pte;
-		    ),
-
-	    TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx",
-		      __entry->addr, __entry->pte)
-);
-
 TRACE_EVENT(hugepage_set_pmd,
 
 	    TP_PROTO(unsigned long addr, unsigned long pmd),
@@ -65,23 +47,6 @@ TRACE_EVENT(hugepage_update,
 
 	    TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
 );
-TRACE_EVENT(hugepage_splitting,
-
-	    TP_PROTO(unsigned long addr, unsigned long pte),
-	    TP_ARGS(addr, pte),
-	    TP_STRUCT__entry(
-		    __field(unsigned long, addr)
-		    __field(unsigned long, pte)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->addr = addr;
-		    __entry->pte = pte;
-		    ),
-
-	    TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx",
-		      __entry->addr, __entry->pte)
-);
 
 #endif /* _TRACE_THP_H */
 
-- 
cgit v1.2.3


From c9fdc4d5487a16bd1f003fc8b66e91f88efb50e6 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Fri, 14 Jan 2022 14:09:06 -0800
Subject: mm/hwpoison: remove MF_MSG_BUDDY_2ND and MF_MSG_POISONED_HUGE

These action_page_types are no longer used, so remove them.

Link: https://lkml.kernel.org/r/20211115084006.3728254-3-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Acked-by: Yang Shi <shy828301@gmail.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ding Hui <dinghui@sangfor.com.cn>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h      | 2 --
 include/ras/ras_event.h | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb67eb699b78..7f594da84aca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3201,7 +3201,6 @@ enum mf_action_page_type {
 	MF_MSG_KERNEL_HIGH_ORDER,
 	MF_MSG_SLAB,
 	MF_MSG_DIFFERENT_COMPOUND,
-	MF_MSG_POISONED_HUGE,
 	MF_MSG_HUGE,
 	MF_MSG_FREE_HUGE,
 	MF_MSG_NON_PMD_HUGE,
@@ -3216,7 +3215,6 @@ enum mf_action_page_type {
 	MF_MSG_CLEAN_LRU,
 	MF_MSG_TRUNCATED_LRU,
 	MF_MSG_BUDDY,
-	MF_MSG_BUDDY_2ND,
 	MF_MSG_DAX,
 	MF_MSG_UNSPLIT_THP,
 	MF_MSG_UNKNOWN,
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 0bdbc0d17d2f..d0337a41141c 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -358,7 +358,6 @@ TRACE_EVENT(aer_event,
 	EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" )	\
 	EM ( MF_MSG_SLAB, "kernel slab page" )				\
 	EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
-	EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" )	\
 	EM ( MF_MSG_HUGE, "huge page" )					\
 	EM ( MF_MSG_FREE_HUGE, "free huge page" )			\
 	EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" )		\
@@ -373,7 +372,6 @@ TRACE_EVENT(aer_event,
 	EM ( MF_MSG_CLEAN_LRU, "clean LRU page" )			\
 	EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" )	\
 	EM ( MF_MSG_BUDDY, "free buddy page" )				\
-	EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" )		\
 	EM ( MF_MSG_DAX, "dax page" )					\
 	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
 	EMe ( MF_MSG_UNKNOWN, "unknown page" )
-- 
cgit v1.2.3


From bf181c582588f8f7406d52f2ee228539b465f173 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Fri, 14 Jan 2022 14:09:09 -0800
Subject: mm/hwpoison: fix unpoison_memory()

After recent soft-offline rework, error pages can be taken off from
buddy allocator, but the existing unpoison_memory() does not properly
undo the operation.  Moreover, due to the recent change on
__get_hwpoison_page(), get_page_unless_zero() is hardly called for
hwpoisoned pages.  So __get_hwpoison_page() highly likely returns -EBUSY
(meaning to fail to grab page refcount) and unpoison just clears
PG_hwpoison without releasing a refcount.  That does not lead to a
critical issue like kernel panic, but unpoisoned pages never get back to
buddy (leaked permanently), which is not good.

To (partially) fix this, we need to identify "taken off" pages from
other types of hwpoisoned pages.  We can't use refcount or page flags
for this purpose, so a pseudo flag is defined by hacking ->private
field.  Someone might think that put_page() is enough to cancel
taken-off pages, but the normal free path contains some operations not
suitable for the current purpose, and can fire VM_BUG_ON().

Note that unpoison_memory() is now supposed to be cancel hwpoison events
injected only by madvise() or
/sys/devices/system/memory/{hard,soft}_offline_page, not by MCE
injection, so please don't try to use unpoison when testing with MCE
injection.

[lkp@intel.com: report build failure for ARCH=i386]

Link: https://lkml.kernel.org/r/20211115084006.3728254-4-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ding Hui <dinghui@sangfor.com.cn>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h         | 1 +
 include/linux/page-flags.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f594da84aca..d4fb49a5d60d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3174,6 +3174,7 @@ enum mf_flags {
 	MF_ACTION_REQUIRED = 1 << 1,
 	MF_MUST_KILL = 1 << 2,
 	MF_SOFT_OFFLINE = 1 << 3,
+	MF_UNPOISON = 1 << 4,
 };
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 18423c2157e8..7e2b90dc7d3f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -522,7 +522,11 @@ PAGEFLAG_FALSE(Uncached, uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+#define MAGIC_HWPOISON	0x48575053U	/* HWPS */
+extern void SetPageHWPoisonTakenOff(struct page *page);
+extern void ClearPageHWPoisonTakenOff(struct page *page);
 extern bool take_page_off_buddy(struct page *page);
+extern bool put_page_back_buddy(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison, hwpoison)
 #define __PG_HWPOISON 0
-- 
cgit v1.2.3


From 5ee2fa2f063649570c702164f47a558a3432dd9e Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 14 Jan 2022 14:09:16 -0800
Subject: mm/rmap: fix potential batched TLB flush race

In theory, the following race is possible for batched TLB flushing.

  CPU0                               CPU1
  ----                               ----
  shrink_page_list()
                                     unmap
                                       zap_pte_range()
                                         flush_tlb_batched_pending()
                                           flush_tlb_mm()
    try_to_unmap()
      set_tlb_ubc_flush_pending()
        mm->tlb_flush_batched = true
                                           mm->tlb_flush_batched = false

After the TLB is flushed on CPU1 via flush_tlb_mm() and before
mm->tlb_flush_batched is set to false, some PTE is unmapped on CPU0 and
the TLB flushing is pended.  Then the pended TLB flushing will be lost.
Although both set_tlb_ubc_flush_pending() and
flush_tlb_batched_pending() are called with PTL locked, different PTL
instances may be used.

Because the race window is really small, and the lost TLB flushing will
cause problem only if a TLB entry is inserted before the unmapping in
the race window, the race is only theoretical.  But the fix is simple
and cheap too.

Syzbot has reported this too as follows:

    ==================================================================
    BUG: KCSAN: data-race in flush_tlb_batched_pending / try_to_unmap_one

    write to 0xffff8881072cfbbc of 1 bytes by task 17406 on cpu 1:
     flush_tlb_batched_pending+0x5f/0x80 mm/rmap.c:691
     madvise_free_pte_range+0xee/0x7d0 mm/madvise.c:594
     walk_pmd_range mm/pagewalk.c:128 [inline]
     walk_pud_range mm/pagewalk.c:205 [inline]
     walk_p4d_range mm/pagewalk.c:240 [inline]
     walk_pgd_range mm/pagewalk.c:277 [inline]
     __walk_page_range+0x981/0x1160 mm/pagewalk.c:379
     walk_page_range+0x131/0x300 mm/pagewalk.c:475
     madvise_free_single_vma mm/madvise.c:734 [inline]
     madvise_dontneed_free mm/madvise.c:822 [inline]
     madvise_vma mm/madvise.c:996 [inline]
     do_madvise+0xe4a/0x1140 mm/madvise.c:1202
     __do_sys_madvise mm/madvise.c:1228 [inline]
     __se_sys_madvise mm/madvise.c:1226 [inline]
     __x64_sys_madvise+0x5d/0x70 mm/madvise.c:1226
     do_syscall_x64 arch/x86/entry/common.c:50 [inline]
     do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
     entry_SYSCALL_64_after_hwframe+0x44/0xae

    write to 0xffff8881072cfbbc of 1 bytes by task 71 on cpu 0:
     set_tlb_ubc_flush_pending mm/rmap.c:636 [inline]
     try_to_unmap_one+0x60e/0x1220 mm/rmap.c:1515
     rmap_walk_anon+0x2fb/0x470 mm/rmap.c:2301
     try_to_unmap+0xec/0x110
     shrink_page_list+0xe91/0x2620 mm/vmscan.c:1719
     shrink_inactive_list+0x3fb/0x730 mm/vmscan.c:2394
     shrink_list mm/vmscan.c:2621 [inline]
     shrink_lruvec+0x3c9/0x710 mm/vmscan.c:2940
     shrink_node_memcgs+0x23e/0x410 mm/vmscan.c:3129
     shrink_node+0x8f6/0x1190 mm/vmscan.c:3252
     kswapd_shrink_node mm/vmscan.c:4022 [inline]
     balance_pgdat+0x702/0xd30 mm/vmscan.c:4213
     kswapd+0x200/0x340 mm/vmscan.c:4473
     kthread+0x2c7/0x2e0 kernel/kthread.c:327
     ret_from_fork+0x1f/0x30

    value changed: 0x01 -> 0x00

    Reported by Kernel Concurrency Sanitizer on:
    CPU: 0 PID: 71 Comm: kswapd0 Not tainted 5.16.0-rc1-syzkaller #0
    Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
    ==================================================================

[akpm@linux-foundation.org: tweak comments]

Link: https://lkml.kernel.org/r/20211201021104.126469-1-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reported-by: syzbot+aa5bebed695edaccf0df@syzkaller.appspotmail.com
Cc: Nadav Amit <namit@vmware.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6a89f128c990..e3b0476a4fda 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -647,7 +647,7 @@ struct mm_struct {
 		atomic_t tlb_flush_pending;
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 		/* See flush_tlb_batched_pending() */
-		bool tlb_flush_batched;
+		atomic_t tlb_flush_batched;
 #endif
 		struct uprobes_state uprobes_state;
 #ifdef CONFIG_PREEMPT_RT
-- 
cgit v1.2.3


From cab0a7c115546a4865fb7439558af9077a569574 Mon Sep 17 00:00:00 2001
From: Ting Liu <liuting.0x7c00@bytedance.com>
Date: Fri, 14 Jan 2022 14:09:28 -0800
Subject: mm: make some vars and functions static or __init

"page_idle_ops" as a global var, but its scope of use within this
document.  So it should be static.

"page_ext_ops" is a var used in the kernel initial phase.  And other
functions are aslo used in the kernel initial phase.  So they should be
__init or __initdata to reclaim memory.

Link: https://lkml.kernel.org/r/20211217095023.67293-1-liuting.0x7c00@bytedance.com
Signed-off-by: Ting Liu <liuting.0x7c00@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_idle.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 83abf95e9fa7..4663dfed1293 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -13,7 +13,6 @@
  * If there is not enough space to store Idle and Young bits in page flags, use
  * page ext flags instead.
  */
-extern struct page_ext_operations page_idle_ops;
 
 static inline bool folio_test_young(struct folio *folio)
 {
-- 
cgit v1.2.3


From c46b0bb6a735db0b6140e12e750b5acb1b032982 Mon Sep 17 00:00:00 2001
From: Xin Hao <xhao@linux.alibaba.com>
Date: Fri, 14 Jan 2022 14:09:37 -0800
Subject: mm/damon: add 'age' of region tracepoint support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Damon, we can get age information by analyzing the nr_access change,
But short time sampling is not effective, we have to obtain enough data
for analysis through long time trace, this also means that we need to
consume more cpu resources and storage space.

Now the region add a new 'age' variable, we only need to get the change of
age value through a little time trace, for example, age has been
increasing to 141, but nr_access shows a value of 0 at the same time,
Through this，we can conclude that the region has a very low nr_access
value for a long time.

Link: https://lkml.kernel.org/r/b9def1262af95e0dc1d0caea447886434db01161.1636989871.git.xhao@linux.alibaba.com
Signed-off-by: Xin Hao <xhao@linux.alibaba.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/damon.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
index 2f422f4f1fb9..99ffa601e351 100644
--- a/include/trace/events/damon.h
+++ b/include/trace/events/damon.h
@@ -22,6 +22,7 @@ TRACE_EVENT(damon_aggregated,
 		__field(unsigned long, start)
 		__field(unsigned long, end)
 		__field(unsigned int, nr_accesses)
+		__field(unsigned int, age)
 	),
 
 	TP_fast_assign(
@@ -30,11 +31,13 @@ TRACE_EVENT(damon_aggregated,
 		__entry->start = r->ar.start;
 		__entry->end = r->ar.end;
 		__entry->nr_accesses = r->nr_accesses;
+		__entry->age = r->age;
 	),
 
-	TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u",
+	TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u",
 			__entry->target_id, __entry->nr_regions,
-			__entry->start, __entry->end, __entry->nr_accesses)
+			__entry->start, __entry->end,
+			__entry->nr_accesses, __entry->age)
 );
 
 #endif /* _TRACE_DAMON_H */
-- 
cgit v1.2.3


From cdeed009f3bceee41f73f0137db785fd29a05cb8 Mon Sep 17 00:00:00 2001
From: Xin Hao <xhao@linux.alibaba.com>
Date: Fri, 14 Jan 2022 14:09:44 -0800
Subject: mm/damon: remove some unneeded function definitions in damon.h

In damon.h some func definitions about VA & PA can only be used in its
own file, so there no need to define in the header file, and the header
file will look cleaner.

If other files later need these functions, the prototypes can be added
to damon.h at that time.

[sj@kernel.org: remove unnecessary function prototype position changes]
 Link: https://lkml.kernel.org/r/20211118114827.20052-1-sj@kernel.org

Link: https://lkml.kernel.org/r/45fd5b3ef6cce8e28dbc1c92f9dc845ccfc949d7.1636989871.git.xhao@linux.alibaba.com
Signed-off-by: Xin Hao <xhao@linux.alibaba.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index b4d4be3cc987..1d1be348f506 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -461,34 +461,13 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
 #endif	/* CONFIG_DAMON */
 
 #ifdef CONFIG_DAMON_VADDR
-
-/* Monitoring primitives for virtual memory address spaces */
-void damon_va_init(struct damon_ctx *ctx);
-void damon_va_update(struct damon_ctx *ctx);
-void damon_va_prepare_access_checks(struct damon_ctx *ctx);
-unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
 bool damon_va_target_valid(void *t);
-void damon_va_cleanup(struct damon_ctx *ctx);
-int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t,
-		struct damon_region *r, struct damos *scheme);
-int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t,
-		struct damon_region *r, struct damos *scheme);
 void damon_va_set_primitives(struct damon_ctx *ctx);
-
 #endif	/* CONFIG_DAMON_VADDR */
 
 #ifdef CONFIG_DAMON_PADDR
-
-/* Monitoring primitives for the physical memory address space */
-void damon_pa_prepare_access_checks(struct damon_ctx *ctx);
-unsigned int damon_pa_check_accesses(struct damon_ctx *ctx);
 bool damon_pa_target_valid(void *t);
-int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t,
-		struct damon_region *r, struct damos *scheme);
-int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t,
-		struct damon_region *r, struct damos *scheme);
 void damon_pa_set_primitives(struct damon_ctx *ctx);
-
 #endif	/* CONFIG_DAMON_PADDR */
 
 #endif	/* _DAMON_H */
-- 
cgit v1.2.3


From 9b2a38d6ef25c1748e3964b0ff30a89e4ed26583 Mon Sep 17 00:00:00 2001
From: Xin Hao <xhao@linux.alibaba.com>
Date: Fri, 14 Jan 2022 14:09:53 -0800
Subject: mm/damon: move damon_rand() definition into damon.h

damon_rand() is called in three files:damon/core.c, damon/ paddr.c,
damon/vaddr.c, i think there is no need to redefine this twice, So move
it to damon.h will be a good choice.

Link: https://lkml.kernel.org/r/20211202075859.51341-1-xhao@linux.alibaba.com
Signed-off-by: Xin Hao <xhao@linux.alibaba.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 1d1be348f506..3e91a597a1aa 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -11,12 +11,16 @@
 #include <linux/mutex.h>
 #include <linux/time64.h>
 #include <linux/types.h>
+#include <linux/random.h>
 
 /* Minimal region size.  Every damon_region is aligned by this. */
 #define DAMON_MIN_REGION	PAGE_SIZE
 /* Max priority score for DAMON-based operation schemes */
 #define DAMOS_MAX_SCORE		(99)
 
+/* Get a random number in [l, r) */
+#define damon_rand(l, r) (l + prandom_u32_max(r - l))
+
 /**
  * struct damon_addr_range - Represents an address region of [@start, @end).
  * @start:	Start address of the region (inclusive).
-- 
cgit v1.2.3


From 234d68732b6c135087bdebfa0630a43ae8c27758 Mon Sep 17 00:00:00 2001
From: Xin Hao <xhao@linux.alibaba.com>
Date: Fri, 14 Jan 2022 14:09:56 -0800
Subject: mm/damon: modify damon_rand() macro to static inline function

damon_rand() cannot be implemented as a macro.

Example:
	damon_rand(a++, b);

The value of 'a' will be incremented twice, This is obviously
unreasonable, So there fix it.

Link: https://lkml.kernel.org/r/110ffcd4e420c86c42b41ce2bc9f0fe6a4f32cd3.1638795127.git.xhao@linux.alibaba.com
Fixes: b9a6ac4e4ede ("mm/damon: adaptively adjust regions")
Signed-off-by: Xin Hao <xhao@linux.alibaba.com>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 3e91a597a1aa..e2c8152985b7 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -19,7 +19,10 @@
 #define DAMOS_MAX_SCORE		(99)
 
 /* Get a random number in [l, r) */
-#define damon_rand(l, r) (l + prandom_u32_max(r - l))
+static inline unsigned long damon_rand(unsigned long l, unsigned long r)
+{
+	return l + prandom_u32_max(r - l);
+}
 
 /**
  * struct damon_addr_range - Represents an address region of [@start, @end).
-- 
cgit v1.2.3


From 88f86dcfa454784f7de550966c60fc78a3e95d6d Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 14 Jan 2022 14:09:59 -0800
Subject: mm/damon: convert macro functions to static inline functions

Patch series "mm/damon: Misc cleanups".

This patchset contains miscellaneous cleanups for DAMON's macro
functions and documentation.

This patch (of 6):

This commit converts macro functions in DAMON to static inline functions,
for better type checking, code documentation, etc[1].

[1] https://lore.kernel.org/linux-mm/20211202151213.6ec830863342220da4141bc5@linux-foundation.org/

Link: https://lkml.kernel.org/r/20211209131806.19317-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211209131806.19317-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e2c8152985b7..2dbc1f545da2 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -399,14 +399,20 @@ struct damon_ctx {
 	struct list_head schemes;
 };
 
-#define damon_next_region(r) \
-	(container_of(r->list.next, struct damon_region, list))
+static inline struct damon_region *damon_next_region(struct damon_region *r)
+{
+	return container_of(r->list.next, struct damon_region, list);
+}
 
-#define damon_prev_region(r) \
-	(container_of(r->list.prev, struct damon_region, list))
+static inline struct damon_region *damon_prev_region(struct damon_region *r)
+{
+	return container_of(r->list.prev, struct damon_region, list);
+}
 
-#define damon_last_region(t) \
-	(list_last_entry(&t->regions_list, struct damon_region, list))
+static inline struct damon_region *damon_last_region(struct damon_target *t)
+{
+	return list_last_entry(&t->regions_list, struct damon_region, list);
+}
 
 #define damon_for_each_region(r, t) \
 	list_for_each_entry(r, &t->regions_list, list)
-- 
cgit v1.2.3


From f4c6d22c6cf282ef7d24a724b9bd978ee2b74fc6 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 14 Jan 2022 14:10:14 -0800
Subject: mm/damon: remove a mistakenly added comment for a future feature

Due to a mistake in patches reordering, a comment for a future feature
called 'arbitrary monitoring target support'[1], which is still under
development, has added.  Because it only introduces confusion and we
don't have a plan to post the patches soon, this commit removes the
mistakenly added part.

[1] https://lore.kernel.org/linux-mm/20201215115448.25633-3-sjpark@amazon.com/

Link: https://lkml.kernel.org/r/20211209131806.19317-7-sj@kernel.org
Fixes: 1f366e421c8f ("mm/damon/core: implement DAMON-based Operation Schemes (DAMOS)")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 2dbc1f545da2..97f4a224e950 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -281,7 +281,7 @@ struct damon_ctx;
  * as an integer in [0, &DAMOS_MAX_SCORE].
  * @apply_scheme is called from @kdamond when a region for user provided
  * DAMON-based operation scheme is found.  It should apply the scheme's action
- * to the region.  This is not used for &DAMON_ARBITRARY_TARGET case.
+ * to the region.
  * @target_valid should check whether the target is still valid for the
  * monitoring.
  * @cleanup is called from @kdamond just before its termination.
-- 
cgit v1.2.3


From 0e92c2ee9f459542c5384d9cfab24873c3dd6398 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 14 Jan 2022 14:10:17 -0800
Subject: mm/damon/schemes: account scheme actions that successfully applied

Patch series "mm/damon/schemes: Extend stats for better online analysis and tuning".

To help online access pattern analysis and tuning of DAMON-based
Operation Schemes (DAMOS), DAMOS provides simple statistics for each
scheme.  Introduction of DAMOS time/space quota further made the tuning
easier by making the risk management easier.  However, that also made
understanding of the working schemes a little bit more difficult.

For an example, progress of a given scheme can now be throttled by not
only the aggressiveness of the target access pattern, but also the
time/space quotas.  So, when a scheme is showing unexpectedly slow
progress, it's difficult to know by what the progress of the scheme is
throttled, with currently provided statistics.

This patchset extends the statistics to contain some metrics that can be
helpful for such online schemes analysis and tuning (patches 1-2),
exports those to users (patches 3 and 5), and add documents (patches 4
and 6).

This patch (of 6):

DAMON-based operation schemes (DAMOS) stats provide only the number and
the amount of regions that the action of the scheme has tried to be
applied.  Because the action could be failed for some reasons, the
currently provided information is sometimes not useful or convenient
enough for schemes profiling and tuning.  To improve this situation,
this commit extends the DAMOS stats to provide the number and the amount
of regions that the action has successfully applied.

Link: https://lkml.kernel.org/r/20211210150016.35349-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211210150016.35349-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 97f4a224e950..e0ad3d9aaeed 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -192,6 +192,20 @@ struct damos_watermarks {
 	bool activated;
 };
 
+/**
+ * struct damos_stat - Statistics on a given scheme.
+ * @nr_tried:	Total number of regions that the scheme is tried to be applied.
+ * @sz_tried:	Total size of regions that the scheme is tried to be applied.
+ * @nr_applied:	Total number of regions that the scheme is applied.
+ * @sz_applied:	Total size of regions that the scheme is applied.
+ */
+struct damos_stat {
+	unsigned long nr_tried;
+	unsigned long sz_tried;
+	unsigned long nr_applied;
+	unsigned long sz_applied;
+};
+
 /**
  * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
  * @min_sz_region:	Minimum size of target regions.
@@ -203,8 +217,7 @@ struct damos_watermarks {
  * @action:		&damo_action to be applied to the target regions.
  * @quota:		Control the aggressiveness of this scheme.
  * @wmarks:		Watermarks for automated (in)activation of this scheme.
- * @stat_count:		Total number of regions that this scheme is applied.
- * @stat_sz:		Total size of regions that this scheme is applied.
+ * @stat:		Statistics of this scheme.
  * @list:		List head for siblings.
  *
  * For each aggregation interval, DAMON finds regions which fit in the
@@ -235,8 +248,7 @@ struct damos {
 	enum damos_action action;
 	struct damos_quota quota;
 	struct damos_watermarks wmarks;
-	unsigned long stat_count;
-	unsigned long stat_sz;
+	struct damos_stat stat;
 	struct list_head list;
 };
 
@@ -281,7 +293,8 @@ struct damon_ctx;
  * as an integer in [0, &DAMOS_MAX_SCORE].
  * @apply_scheme is called from @kdamond when a region for user provided
  * DAMON-based operation scheme is found.  It should apply the scheme's action
- * to the region.
+ * to the region and return bytes of the region that the action is successfully
+ * applied.
  * @target_valid should check whether the target is still valid for the
  * monitoring.
  * @cleanup is called from @kdamond just before its termination.
@@ -295,8 +308,9 @@ struct damon_primitive {
 	int (*get_scheme_score)(struct damon_ctx *context,
 			struct damon_target *t, struct damon_region *r,
 			struct damos *scheme);
-	int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t,
-			struct damon_region *r, struct damos *scheme);
+	unsigned long (*apply_scheme)(struct damon_ctx *context,
+			struct damon_target *t, struct damon_region *r,
+			struct damos *scheme);
 	bool (*target_valid)(void *target);
 	void (*cleanup)(struct damon_ctx *context);
 };
-- 
cgit v1.2.3


From 6268eac34ca30af7f6313504d556ec7fcd295621 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 14 Jan 2022 14:10:20 -0800
Subject: mm/damon/schemes: account how many times quota limit has exceeded

If the time/space quotas of a given DAMON-based operation scheme is too
small, the scheme could show unexpectedly slow progress.  However, there
is no good way to notice the case in runtime.  This commit extends the
DAMOS stat to provide how many times the quota limits exceeded so that
the users can easily notice the case and tune the scheme.

Link: https://lkml.kernel.org/r/20211210150016.35349-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e0ad3d9aaeed..af648388e759 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -198,12 +198,14 @@ struct damos_watermarks {
  * @sz_tried:	Total size of regions that the scheme is tried to be applied.
  * @nr_applied:	Total number of regions that the scheme is applied.
  * @sz_applied:	Total size of regions that the scheme is applied.
+ * @qt_exceeds: Total number of times the quota of the scheme has exceeded.
  */
 struct damos_stat {
 	unsigned long nr_tried;
 	unsigned long sz_tried;
 	unsigned long nr_applied;
 	unsigned long sz_applied;
+	unsigned long qt_exceeds;
 };
 
 /**
-- 
cgit v1.2.3


From 2cd4b8e10cc31eadb5b10b1d73b3f28156f3776c Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Fri, 14 Jan 2022 14:10:38 -0800
Subject: mm/damon: move the implementation of damon_insert_region to damon.h

Usually, inline function is declared static since it should sit between
storage and type.  And implement it in a header file if used by multiple
files.

And this change also fixes compile issue when backport damon to 5.10.

  mm/damon/vaddr.c: In function `damon_va_evenly_split_region':
  ./include/linux/damon.h:425:13: error: inlining failed in call to `always_inline' `damon_insert_region': function body not available
  425 | inline void damon_insert_region(struct damon_region *r,
      | ^~~~~~~~~~~~~~~~~~~
  mm/damon/vaddr.c:86:3: note: called from here
  86 | damon_insert_region(n, r, next, t);
     | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Link: https://lkml.kernel.org/r/20211223085703.6142-1-guoqing.jiang@linux.dev
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index af648388e759..5e1e3a128b77 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -451,9 +451,18 @@ static inline struct damon_region *damon_last_region(struct damon_target *t)
 #ifdef CONFIG_DAMON
 
 struct damon_region *damon_new_region(unsigned long start, unsigned long end);
-inline void damon_insert_region(struct damon_region *r,
+
+/*
+ * Add a region between two other regions
+ */
+static inline void damon_insert_region(struct damon_region *r,
 		struct damon_region *prev, struct damon_region *next,
-		struct damon_target *t);
+		struct damon_target *t)
+{
+	__list_add(&r->list, &prev->list, &next->list);
+	t->nr_regions++;
+}
+
 void damon_add_region(struct damon_region *r, struct damon_target *t);
 void damon_destroy_region(struct damon_region *r, struct damon_target *t);
 
-- 
cgit v1.2.3


From 76fd0285b447991267e838842c0be7395eb454bb Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 14 Jan 2022 14:10:50 -0800
Subject: mm/damon: hide kernel pointer from tracepoint event

DAMON's virtual address spaces monitoring primitive uses 'struct pid *'
of the target process as its monitoring target id.  The kernel address
is exposed as-is to the user space via the DAMON tracepoint,
'damon_aggregated'.

Though primarily only privileged users are allowed to access that, it
would be better to avoid unnecessarily exposing kernel pointers so.
Because the trace result is only required to be able to distinguish each
target, we aren't need to use the pointer as-is.

This makes the tracepoint to use the index of the target in the
context's targets list as its id in the tracepoint, to hide the kernel
space address.

Link: https://lkml.kernel.org/r/20211229131016.23641-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/damon.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
index 99ffa601e351..c79f1d4c39af 100644
--- a/include/trace/events/damon.h
+++ b/include/trace/events/damon.h
@@ -11,10 +11,10 @@
 
 TRACE_EVENT(damon_aggregated,
 
-	TP_PROTO(struct damon_target *t, struct damon_region *r,
-		unsigned int nr_regions),
+	TP_PROTO(struct damon_target *t, unsigned int target_id,
+		struct damon_region *r, unsigned int nr_regions),
 
-	TP_ARGS(t, r, nr_regions),
+	TP_ARGS(t, target_id, r, nr_regions),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, target_id)
@@ -26,7 +26,7 @@ TRACE_EVENT(damon_aggregated,
 	),
 
 	TP_fast_assign(
-		__entry->target_id = t->id;
+		__entry->target_id = target_id;
 		__entry->nr_regions = nr_regions;
 		__entry->start = r->ar.start;
 		__entry->end = r->ar.end;
-- 
cgit v1.2.3


From dea2903719283c156b53741126228c4a1b40440f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 10 Jan 2022 19:00:02 -0500
Subject: cifs: move superblock magic defitions to magic.h

Help userland apps to identify cifs and smb2 mounts.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 include/uapi/linux/magic.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 35687dcb1a42..a9f4dcb7b457 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -51,6 +51,7 @@
 #define QNX6_SUPER_MAGIC	0x68191122	/* qnx6 fs detection */
 #define AFS_FS_MAGIC		0x6B414653
 
+
 #define REISERFS_SUPER_MAGIC	0x52654973	/* used by gcc */
 					/* used by file system utilities that
 	                                   look at the superblock, etc.  */
@@ -59,6 +60,9 @@
 #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
 
 #define SMB_SUPER_MAGIC		0x517B
+#define CIFS_SUPER_MAGIC	0xFF534D42      /* the first four bytes of SMB PDUs */
+#define SMB2_SUPER_MAGIC	0xFE534D42
+
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 #define CGROUP2_SUPER_MAGIC	0x63677270
 
-- 
cgit v1.2.3


From b7ec62d7ee0f0b8af6ba190501dff7f9ee6545ca Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:16:57 -0700
Subject: bitops: protect find_first_{,zero}_bit properly

find_first_bit() and find_first_zero_bit() are not protected with
ifdefs as other functions in find.h. It causes build errors on some
platforms if CONFIG_GENERIC_FIND_FIRST_BIT is enabled.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Fixes: 2cc7b6a44ac2 ("lib: add fast path for find_first_*_bit() and find_last_bit()")
Reported-by: kernel test robot <lkp@intel.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/asm-generic/bitops/find.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 0d132ee2a291..835f959a25f2 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -97,6 +97,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
 
+#ifndef find_first_bit
 /**
  * find_first_bit - find the first set bit in a memory region
  * @addr: The address to start the search at
@@ -116,7 +117,9 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 
 	return _find_first_bit(addr, size);
 }
+#endif
 
+#ifndef find_first_zero_bit
 /**
  * find_first_zero_bit - find the first cleared bit in a memory region
  * @addr: The address to start the search at
@@ -136,6 +139,8 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 
 	return _find_first_zero_bit(addr, size);
 }
+#endif
+
 #else /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifndef find_first_bit
-- 
cgit v1.2.3


From 6b8ecb84f8f64017ae6e56cd745ad88e48f68779 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:16:58 -0700
Subject: bitops: move find_bit_*_le functions from le.h to find.h

It's convenient to have all find_bit declarations in one place.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/asm-generic/bitops/find.h | 69 +++++++++++++++++++++++++++++++++++++++
 include/asm-generic/bitops/le.h   | 64 ------------------------------------
 2 files changed, 69 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 835f959a25f2..91b1b23f2b0c 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -190,4 +190,73 @@ extern unsigned long find_next_clump8(unsigned long *clump,
 #define find_first_clump8(clump, bits, size) \
 	find_next_clump8((clump), (bits), (size), 0)
 
+#if defined(__LITTLE_ENDIAN)
+
+static inline unsigned long find_next_zero_bit_le(const void *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_next_bit_le(const void *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+static inline unsigned long find_first_zero_bit_le(const void *addr,
+		unsigned long size)
+{
+	return find_first_zero_bit(addr, size);
+}
+
+#elif defined(__BIG_ENDIAN)
+
+#ifndef find_next_zero_bit_le
+static inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *(const unsigned long *)addr;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = swab(val) | ~GENMASK(size - 1, offset);
+		return val == ~0UL ? size : ffz(val);
+	}
+
+	return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+}
+#endif
+
+#ifndef find_next_bit_le
+static inline
+unsigned long find_next_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *(const unsigned long *)addr;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = swab(val) & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+}
+#endif
+
+#ifndef find_first_zero_bit_le
+#define find_first_zero_bit_le(addr, size) \
+	find_next_zero_bit_le((addr), (size), 0)
+#endif
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
 #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 5a28629cbf4d..d51beff60375 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -2,83 +2,19 @@
 #ifndef _ASM_GENERIC_BITOPS_LE_H_
 #define _ASM_GENERIC_BITOPS_LE_H_
 
-#include <asm-generic/bitops/find.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
-#include <linux/swab.h>
 
 #if defined(__LITTLE_ENDIAN)
 
 #define BITOP_LE_SWIZZLE	0
 
-static inline unsigned long find_next_zero_bit_le(const void *addr,
-		unsigned long size, unsigned long offset)
-{
-	return find_next_zero_bit(addr, size, offset);
-}
-
-static inline unsigned long find_next_bit_le(const void *addr,
-		unsigned long size, unsigned long offset)
-{
-	return find_next_bit(addr, size, offset);
-}
-
-static inline unsigned long find_first_zero_bit_le(const void *addr,
-		unsigned long size)
-{
-	return find_first_zero_bit(addr, size);
-}
-
 #elif defined(__BIG_ENDIAN)
 
 #define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
 
-#ifndef find_next_zero_bit_le
-static inline
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *(const unsigned long *)addr;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = swab(val) | ~GENMASK(size - 1, offset);
-		return val == ~0UL ? size : ffz(val);
-	}
-
-	return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
-}
-#endif
-
-#ifndef find_next_bit_le
-static inline
-unsigned long find_next_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *(const unsigned long *)addr;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = swab(val) & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
-	}
-
-	return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
-}
 #endif
 
-#ifndef find_first_zero_bit_le
-#define find_first_zero_bit_le(addr, size) \
-	find_next_zero_bit_le((addr), (size), 0)
-#endif
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
 
 static inline int test_bit_le(int nr, const void *addr)
 {
-- 
cgit v1.2.3


From 47d8c15615c0a2046d2d90b04cb80b81ddf31fb1 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:16:59 -0700
Subject: include: move find.h from asm_generic to linux

find_bit API and bitmap API are closely related, but inclusion paths
are different - include/asm-generic and include/linux, correspondingly.
In the past it made a lot of troubles due to circular dependencies
and/or undefined symbols. Fix this by moving find.h under include/linux.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 include/asm-generic/bitops.h      |   1 -
 include/asm-generic/bitops/find.h | 262 -------------------------------------
 include/linux/bitmap.h            |   1 +
 include/linux/find.h              | 268 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 269 insertions(+), 263 deletions(-)
 delete mode 100644 include/asm-generic/bitops/find.h
 create mode 100644 include/linux/find.h

(limited to 'include')

diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index df9b5bc3d282..a47b8a71d6fe 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -20,7 +20,6 @@
 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
 
 #ifndef _LINUX_BITOPS_H
 #error only <linux/bitops.h> can be included directly
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
deleted file mode 100644
index 91b1b23f2b0c..000000000000
--- a/include/asm-generic/bitops/find.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_BITOPS_FIND_H_
-#define _ASM_GENERIC_BITOPS_FIND_H_
-
-extern unsigned long _find_next_bit(const unsigned long *addr1,
-		const unsigned long *addr2, unsigned long nbits,
-		unsigned long start, unsigned long invert, unsigned long le);
-extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
-extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
-extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
-
-#ifndef find_next_bit
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number for the next set bit
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-			    unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
-	}
-
-	return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
-}
-#endif
-
-#ifndef find_next_and_bit
-/**
- * find_next_and_bit - find the next set bit in both memory regions
- * @addr1: The first address to base the search on
- * @addr2: The second address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number for the next set bit
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_next_and_bit(const unsigned long *addr1,
-		const unsigned long *addr2, unsigned long size,
-		unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr1 & *addr2 & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
-	}
-
-	return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
-}
-#endif
-
-#ifndef find_next_zero_bit
-/**
- * find_next_zero_bit - find the next cleared bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- *
- * Returns the bit number of the next zero bit
- * If no bits are zero, returns @size.
- */
-static inline
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-				 unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr | ~GENMASK(size - 1, offset);
-		return val == ~0UL ? size : ffz(val);
-	}
-
-	return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
-}
-#endif
-
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
-
-#ifndef find_first_bit
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum number of bits to search
- *
- * Returns the bit number of the first set bit.
- * If no bits are set, returns @size.
- */
-static inline
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *addr & GENMASK(size - 1, 0);
-
-		return val ? __ffs(val) : size;
-	}
-
-	return _find_first_bit(addr, size);
-}
-#endif
-
-#ifndef find_first_zero_bit
-/**
- * find_first_zero_bit - find the first cleared bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum number of bits to search
- *
- * Returns the bit number of the first cleared bit.
- * If no bits are zero, returns @size.
- */
-static inline
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *addr | ~GENMASK(size - 1, 0);
-
-		return val == ~0UL ? size : ffz(val);
-	}
-
-	return _find_first_zero_bit(addr, size);
-}
-#endif
-
-#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
-
-#ifndef find_first_bit
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-#endif
-#ifndef find_first_zero_bit
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-#endif
-
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
-
-#ifndef find_last_bit
-/**
- * find_last_bit - find the last set bit in a memory region
- * @addr: The address to start the search at
- * @size: The number of bits to search
- *
- * Returns the bit number of the last set bit, or size.
- */
-static inline
-unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *addr & GENMASK(size - 1, 0);
-
-		return val ? __fls(val) : size;
-	}
-
-	return _find_last_bit(addr, size);
-}
-#endif
-
-/**
- * find_next_clump8 - find next 8-bit clump with set bits in a memory region
- * @clump: location to store copy of found clump
- * @addr: address to base the search on
- * @size: bitmap size in number of bits
- * @offset: bit offset at which to start searching
- *
- * Returns the bit offset for the next set clump; the found clump value is
- * copied to the location pointed by @clump. If no bits are set, returns @size.
- */
-extern unsigned long find_next_clump8(unsigned long *clump,
-				      const unsigned long *addr,
-				      unsigned long size, unsigned long offset);
-
-#define find_first_clump8(clump, bits, size) \
-	find_next_clump8((clump), (bits), (size), 0)
-
-#if defined(__LITTLE_ENDIAN)
-
-static inline unsigned long find_next_zero_bit_le(const void *addr,
-		unsigned long size, unsigned long offset)
-{
-	return find_next_zero_bit(addr, size, offset);
-}
-
-static inline unsigned long find_next_bit_le(const void *addr,
-		unsigned long size, unsigned long offset)
-{
-	return find_next_bit(addr, size, offset);
-}
-
-static inline unsigned long find_first_zero_bit_le(const void *addr,
-		unsigned long size)
-{
-	return find_first_zero_bit(addr, size);
-}
-
-#elif defined(__BIG_ENDIAN)
-
-#ifndef find_next_zero_bit_le
-static inline
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *(const unsigned long *)addr;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = swab(val) | ~GENMASK(size - 1, offset);
-		return val == ~0UL ? size : ffz(val);
-	}
-
-	return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
-}
-#endif
-
-#ifndef find_next_bit_le
-static inline
-unsigned long find_next_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	if (small_const_nbits(size)) {
-		unsigned long val = *(const unsigned long *)addr;
-
-		if (unlikely(offset >= size))
-			return size;
-
-		val = swab(val) & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
-	}
-
-	return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
-}
-#endif
-
-#ifndef find_first_zero_bit_le
-#define find_first_zero_bit_le(addr, size) \
-	find_next_zero_bit_le((addr), (size), 0)
-#endif
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-
-#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a241dcf50f39..ead4a150bd7f 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
 
 #include <linux/align.h>
 #include <linux/bitops.h>
+#include <linux/find.h>
 #include <linux/limits.h>
 #include <linux/string.h>
 #include <linux/types.h>
diff --git a/include/linux/find.h b/include/linux/find.h
new file mode 100644
index 000000000000..c5410c243e04
--- /dev/null
+++ b/include/linux/find.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_FIND_H_
+#define __LINUX_FIND_H_
+
+#ifndef __LINUX_BITMAP_H
+#error only <linux/bitmap.h> can be included directly
+#endif
+
+#include <linux/bitops.h>
+
+extern unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert, unsigned long le);
+extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
+
+#ifndef find_next_bit
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = *addr & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+}
+#endif
+
+#ifndef find_next_and_bit
+/**
+ * find_next_and_bit - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+}
+#endif
+
+#ifndef find_next_zero_bit
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next zero bit
+ * If no bits are zero, returns @size.
+ */
+static inline
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = *addr | ~GENMASK(size - 1, offset);
+		return val == ~0UL ? size : ffz(val);
+	}
+
+	return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+}
+#endif
+
+#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+
+#ifndef find_first_bit
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *addr & GENMASK(size - 1, 0);
+
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_first_bit(addr, size);
+}
+#endif
+
+#ifndef find_first_zero_bit
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
+ */
+static inline
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *addr | ~GENMASK(size - 1, 0);
+
+		return val == ~0UL ? size : ffz(val);
+	}
+
+	return _find_first_zero_bit(addr, size);
+}
+#endif
+
+#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
+#ifndef find_first_bit
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+#endif
+#ifndef find_first_zero_bit
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+#endif
+
+#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
+#ifndef find_last_bit
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The number of bits to search
+ *
+ * Returns the bit number of the last set bit, or size.
+ */
+static inline
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *addr & GENMASK(size - 1, 0);
+
+		return val ? __fls(val) : size;
+	}
+
+	return _find_last_bit(addr, size);
+}
+#endif
+
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+				      const unsigned long *addr,
+				      unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+	find_next_clump8((clump), (bits), (size), 0)
+
+#if defined(__LITTLE_ENDIAN)
+
+static inline unsigned long find_next_zero_bit_le(const void *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_next_bit_le(const void *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+static inline unsigned long find_first_zero_bit_le(const void *addr,
+		unsigned long size)
+{
+	return find_first_zero_bit(addr, size);
+}
+
+#elif defined(__BIG_ENDIAN)
+
+#ifndef find_next_zero_bit_le
+static inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *(const unsigned long *)addr;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = swab(val) | ~GENMASK(size - 1, offset);
+		return val == ~0UL ? size : ffz(val);
+	}
+
+	return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+}
+#endif
+
+#ifndef find_next_bit_le
+static inline
+unsigned long find_next_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *(const unsigned long *)addr;
+
+		if (unlikely(offset >= size))
+			return size;
+
+		val = swab(val) & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+}
+#endif
+
+#ifndef find_first_zero_bit_le
+#define find_first_zero_bit_le(addr, size) \
+	find_next_zero_bit_le((addr), (size), 0)
+#endif
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+#endif /*__LINUX_FIND_H_ */
-- 
cgit v1.2.3


From c126a53c276048125b4a950072bab37ad0fea120 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:00 -0700
Subject: arch: remove GENERIC_FIND_FIRST_BIT entirely

In 5.12 cycle we enabled GENERIC_FIND_FIRST_BIT config option for ARM64
and MIPS. It increased performance and shrunk .text size; and so far
I didn't receive any negative feedback on the change.

https://lore.kernel.org/linux-arch/20210225135700.1381396-1-yury.norov@gmail.com/

Now I think it's a good time to switch all architectures to use
find_{first,last}_bit() unconditionally, and so remove corresponding
config option.

The patch does't introduce functioal changes for arc, arm, arm64, mips,
m68k, s390 and x86, for other architectures I expect improvement both in
performance and .text size.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Alexander Lobakin <alobakin@pm.me> (mips)
Reviewed-by: Alexander Lobakin <alobakin@pm.me> (mips)
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Will Deacon <will@kernel.org>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/find.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/find.h b/include/linux/find.h
index c5410c243e04..ea57f7f38c49 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -101,8 +101,6 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 }
 #endif
 
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
-
 #ifndef find_first_bit
 /**
  * find_first_bit - find the first set bit in a memory region
@@ -147,17 +145,6 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 }
 #endif
 
-#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
-
-#ifndef find_first_bit
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-#endif
-#ifndef find_first_zero_bit
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-#endif
-
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
-
 #ifndef find_last_bit
 /**
  * find_last_bit - find the last set bit in a memory region
-- 
cgit v1.2.3


From f68edc9297bf3f7c94abb54b9b0b053607f7587b Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:01 -0700
Subject: lib: add find_first_and_bit()

Currently find_first_and_bit() is an alias to find_next_and_bit(). However,
it is widely used in cpumask, so it worth to optimize it. This patch adds
its own implementation for find_first_and_bit().

On x86_64 find_bit_benchmark says:

Before (#define find_first_and_bit(...) find_next_and_bit(..., 0):
Start testing find_bit() with random-filled bitmap
[  140.291468] find_first_and_bit:           46890919 ns,  32671 iterations
Start testing find_bit() with sparse bitmap
[  140.295028] find_first_and_bit:               7103 ns,      1 iterations

After:
Start testing find_bit() with random-filled bitmap
[  162.574907] find_first_and_bit:           25045813 ns,  32846 iterations
Start testing find_bit() with sparse bitmap
[  162.578458] find_first_and_bit:               4900 ns,      1 iterations

(Thanks to Alexey Klimov for thorough testing.)

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Alexey Klimov <aklimov@redhat.com>
---
 include/linux/find.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/find.h b/include/linux/find.h
index ea57f7f38c49..6048f8c97418 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -12,6 +12,8 @@ extern unsigned long _find_next_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long nbits,
 		unsigned long start, unsigned long invert, unsigned long le);
 extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_and_bit(const unsigned long *addr1,
+					 const unsigned long *addr2, unsigned long size);
 extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
 extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
 
@@ -123,6 +125,31 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 }
 #endif
 
+#ifndef find_first_and_bit
+/**
+ * find_first_and_bit - find the first set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_first_and_bit(const unsigned long *addr1,
+				 const unsigned long *addr2,
+				 unsigned long size)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_first_and_bit(addr1, addr2, size);
+}
+#endif
+
 #ifndef find_first_zero_bit
 /**
  * find_first_zero_bit - find the first cleared bit in a memory region
-- 
cgit v1.2.3


From 93ba139ba8190c33009c5353ca43c8519443f467 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:02 -0700
Subject: cpumask: use find_first_and_bit()

Now we have an efficient implementation for find_first_and_bit(),
so switch cpumask to use it where appropriate.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/cpumask.h | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1e7399fc69c0..c4e1b9ea0ba4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -123,6 +123,12 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
 	return 0;
 }
 
+static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
+					     const struct cpumask *srcp2)
+{
+	return 0;
+}
+
 static inline unsigned int cpumask_last(const struct cpumask *srcp)
 {
 	return 0;
@@ -167,7 +173,7 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 
 static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
 					     const struct cpumask *src2p) {
-	return cpumask_next_and(-1, src1p, src2p);
+	return cpumask_first_and(src1p, src2p);
 }
 
 static inline int cpumask_any_distribute(const struct cpumask *srcp)
@@ -195,6 +201,19 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
 	return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits);
 }
 
+/**
+ * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Returns >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
+ */
+static inline
+unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
+{
+	return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+}
+
 /**
  * cpumask_last - get the last CPU in a cpumask
  * @srcp:	- the cpumask pointer
@@ -585,15 +604,6 @@ static inline void cpumask_copy(struct cpumask *dstp,
  */
 #define cpumask_any(srcp) cpumask_first(srcp)
 
-/**
- * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
- * @src1p: the first input
- * @src2p: the second input
- *
- * Returns >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
- */
-#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p))
-
 /**
  * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2
  * @mask1: the first input cpumask
-- 
cgit v1.2.3


From 9b51d9d866482a703646fd4c07e433c3d9d88efd Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:05 -0700
Subject: cpumask: replace cpumask_next_* with cpumask_first_* where
 appropriate

cpumask_first() is a more effective analogue of 'next' version if n == -1
(which means start == 0). This patch replaces 'next' with 'first' where
things look trivial.

There's no cpumask_first_zero() function, so create it.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/cpumask.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c4e1b9ea0ba4..64dae70d31f5 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -123,6 +123,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
 	return 0;
 }
 
+static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+{
+	return 0;
+}
+
 static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
 					     const struct cpumask *srcp2)
 {
@@ -201,6 +206,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
 	return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits);
 }
 
+/**
+ * cpumask_first_zero - get the first unset cpu in a cpumask
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if all cpus are set.
+ */
+static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+{
+	return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
 /**
  * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
  * @src1p: the first input
-- 
cgit v1.2.3


From bc9d6635c293a2ac30c6319f7cfd08860ab7948a Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:06 -0700
Subject: include/linux: move for_each_bit() macros from bitops.h to find.h

for_each_bit() macros depend on find_bit() machinery, and so the
proper place for them is the find.h header.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/bitops.h | 34 ----------------------------------
 include/linux/find.h   | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5e62e2383b7f..7aaed501f768 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -32,40 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w);
  */
 #include <asm/bitops.h>
 
-#define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size));		\
-	     (bit) < (size);					\
-	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_from(bit, addr, size) \
-	for ((bit) = find_next_bit((addr), (size), (bit));	\
-	     (bit) < (size);					\
-	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-#define for_each_clear_bit(bit, addr, size) \
-	for ((bit) = find_first_zero_bit((addr), (size));	\
-	     (bit) < (size);					\
-	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_clear_bit() but use bit as value to start with */
-#define for_each_clear_bit_from(bit, addr, size) \
-	for ((bit) = find_next_zero_bit((addr), (size), (bit));	\
-	     (bit) < (size);					\
-	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
-
-/**
- * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
- * @start: bit offset to start search and to store the current iteration offset
- * @clump: location to store copy of current 8-bit clump
- * @bits: bitmap address to base the search on
- * @size: bitmap size in number of bits
- */
-#define for_each_set_clump8(start, clump, bits, size) \
-	for ((start) = find_first_clump8(&(clump), (bits), (size)); \
-	     (start) < (size); \
-	     (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
-
 static inline int get_bitmask_order(unsigned int count)
 {
 	int order;
diff --git a/include/linux/find.h b/include/linux/find.h
index 6048f8c97418..4500e8ab93e2 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -279,4 +279,38 @@ unsigned long find_next_bit_le(const void *addr, unsigned
 #error "Please fix <asm/byteorder.h>"
 #endif
 
+#define for_each_set_bit(bit, addr, size) \
+	for ((bit) = find_first_bit((addr), (size));		\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+	for ((bit) = find_next_bit((addr), (size), (bit));	\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+#define for_each_clear_bit(bit, addr, size) \
+	for ((bit) = find_first_zero_bit((addr), (size));	\
+	     (bit) < (size);					\
+	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_clear_bit() but use bit as value to start with */
+#define for_each_clear_bit_from(bit, addr, size) \
+	for ((bit) = find_next_zero_bit((addr), (size), (bit));	\
+	     (bit) < (size);					\
+	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/**
+ * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
+ * @start: bit offset to start search and to store the current iteration offset
+ * @clump: location to store copy of current 8-bit clump
+ * @bits: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_clump8(start, clump, bits, size) \
+	for ((start) = find_first_clump8(&(clump), (bits), (size)); \
+	     (start) < (size); \
+	     (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+
 #endif /*__LINUX_FIND_H_ */
-- 
cgit v1.2.3


From 7516be9931b8bc8bcaac8531f490b42ab11ded1e Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:07 -0700
Subject: find: micro-optimize for_each_{set,clear}_bit()

The macros iterate thru all set/clear bits in a bitmap. They search a
first bit using find_first_bit(), and the rest bits using find_next_bit().

Since find_next_bit() is called shortly after find_first_bit(), we can
save few lines of I-cache by not using find_first_bit().

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/find.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/find.h b/include/linux/find.h
index 4500e8ab93e2..ae9ed52b52b8 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -280,7 +280,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned
 #endif
 
 #define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size));		\
+	for ((bit) = find_next_bit((addr), (size), 0);		\
 	     (bit) < (size);					\
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
@@ -291,7 +291,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
 #define for_each_clear_bit(bit, addr, size) \
-	for ((bit) = find_first_zero_bit((addr), (size));	\
+	for ((bit) = find_next_zero_bit((addr), (size), 0);	\
 	     (bit) < (size);					\
 	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
 
-- 
cgit v1.2.3


From ec288a2cf7ca40a939316b6df206ab845bb112d1 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sat, 14 Aug 2021 14:17:11 -0700
Subject: bitmap: unify find_bit operations

bitmap_for_each_{set,clear}_region() are similar to for_each_bit()
macros in include/linux/find.h, but interface and implementation
of them are different.

This patch adds for_each_bitrange() macros and drops unused
bitmap_*_region() API in sake of unification.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
---
 include/linux/bitmap.h | 33 -----------------------------
 include/linux/find.h   | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index ead4a150bd7f..7dba0847510c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -55,12 +55,6 @@ struct device;
  *  bitmap_clear(dst, pos, nbits)               Clear specified bit area
  *  bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
  *  bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off)  as above
- *  bitmap_next_clear_region(map, &start, &end, nbits)  Find next clear region
- *  bitmap_next_set_region(map, &start, &end, nbits)  Find next set region
- *  bitmap_for_each_clear_region(map, rs, re, start, end)
- *  						Iterate over all clear regions
- *  bitmap_for_each_set_region(map, rs, re, start, end)
- *  						Iterate over all set regions
  *  bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
  *  bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
  *  bitmap_cut(dst, src, first, n, nbits)       Cut n bits from first, copy rest
@@ -467,14 +461,6 @@ static inline void bitmap_replace(unsigned long *dst,
 		__bitmap_replace(dst, old, new, mask, nbits);
 }
 
-static inline void bitmap_next_clear_region(unsigned long *bitmap,
-					    unsigned int *rs, unsigned int *re,
-					    unsigned int end)
-{
-	*rs = find_next_zero_bit(bitmap, end, *rs);
-	*re = find_next_bit(bitmap, end, *rs + 1);
-}
-
 static inline void bitmap_next_set_region(unsigned long *bitmap,
 					  unsigned int *rs, unsigned int *re,
 					  unsigned int end)
@@ -483,25 +469,6 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
 	*re = find_next_zero_bit(bitmap, end, *rs + 1);
 }
 
-/*
- * Bitmap region iterators.  Iterates over the bitmap between [@start, @end).
- * @rs and @re should be integer variables and will be set to start and end
- * index of the current clear or set region.
- */
-#define bitmap_for_each_clear_region(bitmap, rs, re, start, end)	     \
-	for ((rs) = (start),						     \
-	     bitmap_next_clear_region((bitmap), &(rs), &(re), (end));	     \
-	     (rs) < (re);						     \
-	     (rs) = (re) + 1,						     \
-	     bitmap_next_clear_region((bitmap), &(rs), &(re), (end)))
-
-#define bitmap_for_each_set_region(bitmap, rs, re, start, end)		     \
-	for ((rs) = (start),						     \
-	     bitmap_next_set_region((bitmap), &(rs), &(re), (end));	     \
-	     (rs) < (re);						     \
-	     (rs) = (re) + 1,						     \
-	     bitmap_next_set_region((bitmap), &(rs), &(re), (end)))
-
 /**
  * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
  * @n: u64 value
diff --git a/include/linux/find.h b/include/linux/find.h
index ae9ed52b52b8..5bb6db213bcb 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -301,6 +301,62 @@ unsigned long find_next_bit_le(const void *addr, unsigned
 	     (bit) < (size);					\
 	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
 
+/**
+ * for_each_set_bitrange - iterate over all set bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit)
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_bitrange(b, e, addr, size)			\
+	for ((b) = find_next_bit((addr), (size), 0),		\
+	     (e) = find_next_zero_bit((addr), (size), (b) + 1);	\
+	     (b) < (size);					\
+	     (b) = find_next_bit((addr), (size), (e) + 1),	\
+	     (e) = find_next_zero_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_set_bitrange_from - iterate over all set bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit); must be initialized
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_bitrange_from(b, e, addr, size)		\
+	for ((b) = find_next_bit((addr), (size), (b)),		\
+	     (e) = find_next_zero_bit((addr), (size), (b) + 1);	\
+	     (b) < (size);					\
+	     (b) = find_next_bit((addr), (size), (e) + 1),	\
+	     (e) = find_next_zero_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_clear_bitrange - iterate over all unset bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first unset bit)
+ * @e: bit offset of end of current bitrange (first set bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_clear_bitrange(b, e, addr, size)		\
+	for ((b) = find_next_zero_bit((addr), (size), 0),	\
+	     (e) = find_next_bit((addr), (size), (b) + 1);	\
+	     (b) < (size);					\
+	     (b) = find_next_zero_bit((addr), (size), (e) + 1),	\
+	     (e) = find_next_bit((addr), (size), (b) + 1))
+
+/**
+ * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e)
+ * @b: bit offset of start of current bitrange (first set bit); must be initialized
+ * @e: bit offset of end of current bitrange (first unset bit)
+ * @addr: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_clear_bitrange_from(b, e, addr, size)		\
+	for ((b) = find_next_zero_bit((addr), (size), (b)),	\
+	     (e) = find_next_bit((addr), (size), (b) + 1);	\
+	     (b) < (size);					\
+	     (b) = find_next_zero_bit((addr), (size), (e) + 1),	\
+	     (e) = find_next_bit((addr), (size), (b) + 1))
+
 /**
  * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
  * @start: bit offset to start search and to store the current iteration offset
-- 
cgit v1.2.3


From 7372971c1be5b7d4fdd8ad237798bdc1d1d54162 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 Jan 2022 10:19:22 +0300
Subject: rtc: mc146818-lib: fix signedness bug in mc146818_get_time()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mc146818_get_time() function returns zero on success or negative
a error code on failure.  It needs to be type int.

Fixes: d35786b3a28d ("rtc: mc146818-lib: change return values of mc146818_get_time()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20220111071922.GE11243@kili
---
 include/linux/mc146818rtc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 67fb0a12becc..808bb4cee230 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -124,7 +124,7 @@ struct cmos_rtc_board_info {
 #endif /* ARCH_RTC_LOCATION */
 
 bool mc146818_does_rtc_work(void);
-unsigned int mc146818_get_time(struct rtc_time *time);
+int mc146818_get_time(struct rtc_time *time);
 int mc146818_set_time(struct rtc_time *time);
 
 bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
-- 
cgit v1.2.3


From 3fe7fa5843d204e235d92902190fecb972a3f9cc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 10 Dec 2021 15:09:21 -0500
Subject: mm: Add folio_put_refs()

This is like folio_put(), but puts N references at once instead of
just one.  It's like put_page_refs(), but does one atomic operation
instead of two, and is available to more than just gup.c.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
---
 include/linux/mm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c768a7c81b0b..cb98f75b245e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1244,6 +1244,26 @@ static inline void folio_put(struct folio *folio)
 		__put_page(&folio->page);
 }
 
+/**
+ * folio_put_refs - Reduce the reference count on a folio.
+ * @folio: The folio.
+ * @refs: The amount to subtract from the folio's reference count.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately.  Do not access the memory or the struct folio
+ * after calling folio_put_refs() unless you can be sure that these weren't
+ * the last references.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context.  May be called while holding a spinlock.
+ */
+static inline void folio_put_refs(struct folio *folio, int refs)
+{
+	if (folio_ref_sub_and_test(folio, refs))
+		__put_page(&folio->page);
+}
+
 static inline void put_page(struct page *page)
 {
 	struct folio *folio = page_folio(page);
-- 
cgit v1.2.3


From a6097180d884ddab769fb25588ea8598589c218c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 17 Jan 2022 09:07:26 +1100
Subject: devtmpfs regression fix: reconfigure on each mount

Prior to Linux v5.4 devtmpfs used mount_single() which treats the given
mount options as "remount" options, so it updates the configuration of
the single super_block on each mount.

Since that was changed, the mount options used for devtmpfs are ignored.
This is a regression which affect systemd - which mounts devtmpfs with
"-o mode=755,size=4m,nr_inodes=1m".

This patch restores the "remount" effect by calling reconfigure_single()

Fixes: d401727ea0d7 ("devtmpfs: don't mix {ramfs,shmem}_fill_super() with mount_single()")
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs_context.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 6b54982fc5f3..13fa6f3df8e4 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc);
 extern int vfs_parse_fs_param_source(struct fs_context *fc,
 				     struct fs_parameter *param);
 extern void fc_drop_locked(struct fs_context *fc);
+int reconfigure_single(struct super_block *s,
+		       int flags, void *data);
 
 /*
  * sget() wrappers to be called from the ->get_tree() op.
-- 
cgit v1.2.3


From d8d83d8ab0a453e17e68b3a3bed1f940c34b8646 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 11 Jan 2022 14:37:41 +0100
Subject: lib/crypto: blake2s: move hmac construction into wireguard

Basically nobody should use blake2s in an HMAC construction; it already
has a keyed variant. But unfortunately for historical reasons, Noise,
used by WireGuard, uses HKDF quite strictly, which means we have to use
this. Because this really shouldn't be used by others, this commit moves
it into wireguard's noise.c locally, so that kernels that aren't using
WireGuard don't get this superfluous code baked in. On m68k systems,
this shaves off ~314 bytes.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/crypto/blake2s.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index df3c6c2f9553..f9ffd39194eb 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
 	blake2s_final(&state, out);
 }
 
-void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
-		     const size_t keylen);
-
 #endif /* _CRYPTO_BLAKE2S_H */
-- 
cgit v1.2.3


From 90ed1e67e896cc8040a523f8428fc02f9b164394 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 12 Jan 2022 17:18:08 +0100
Subject: random: rather than entropy_store abstraction, use global

Originally, the RNG used several pools, so having things abstracted out
over a generic entropy_store object made sense. These days, there's only
one input pool, and then an uneven mix of usage via the abstraction and
usage via &input_pool. Rather than this uneasy mixture, just get rid of
the abstraction entirely and have things always use the global. This
simplifies the code and makes reading it a bit easier.

Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/trace/events/random.h | 56 ++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/random.h b/include/trace/events/random.h
index 3d7b432ca5f3..a2d9aa16a5d7 100644
--- a/include/trace/events/random.h
+++ b/include/trace/events/random.h
@@ -28,80 +28,71 @@ TRACE_EVENT(add_device_randomness,
 );
 
 DECLARE_EVENT_CLASS(random__mix_pool_bytes,
-	TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+	TP_PROTO(int bytes, unsigned long IP),
 
-	TP_ARGS(pool_name, bytes, IP),
+	TP_ARGS(bytes, IP),
 
 	TP_STRUCT__entry(
-		__field( const char *,	pool_name		)
 		__field(	  int,	bytes			)
 		__field(unsigned long,	IP			)
 	),
 
 	TP_fast_assign(
-		__entry->pool_name	= pool_name;
 		__entry->bytes		= bytes;
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: bytes %d caller %pS",
-		  __entry->pool_name, __entry->bytes, (void *)__entry->IP)
+	TP_printk("input pool: bytes %d caller %pS",
+		  __entry->bytes, (void *)__entry->IP)
 );
 
 DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
-	TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+	TP_PROTO(int bytes, unsigned long IP),
 
-	TP_ARGS(pool_name, bytes, IP)
+	TP_ARGS(bytes, IP)
 );
 
 DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
-	TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+	TP_PROTO(int bytes, unsigned long IP),
 
-	TP_ARGS(pool_name, bytes, IP)
+	TP_ARGS(bytes, IP)
 );
 
 TRACE_EVENT(credit_entropy_bits,
-	TP_PROTO(const char *pool_name, int bits, int entropy_count,
-		 unsigned long IP),
+	TP_PROTO(int bits, int entropy_count, unsigned long IP),
 
-	TP_ARGS(pool_name, bits, entropy_count, IP),
+	TP_ARGS(bits, entropy_count, IP),
 
 	TP_STRUCT__entry(
-		__field( const char *,	pool_name		)
 		__field(	  int,	bits			)
 		__field(	  int,	entropy_count		)
 		__field(unsigned long,	IP			)
 	),
 
 	TP_fast_assign(
-		__entry->pool_name	= pool_name;
 		__entry->bits		= bits;
 		__entry->entropy_count	= entropy_count;
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: bits %d entropy_count %d caller %pS",
-		  __entry->pool_name, __entry->bits,
-		  __entry->entropy_count, (void *)__entry->IP)
+	TP_printk("input pool: bits %d entropy_count %d caller %pS",
+		  __entry->bits, __entry->entropy_count, (void *)__entry->IP)
 );
 
 TRACE_EVENT(debit_entropy,
-	TP_PROTO(const char *pool_name, int debit_bits),
+	TP_PROTO(int debit_bits),
 
-	TP_ARGS(pool_name, debit_bits),
+	TP_ARGS( debit_bits),
 
 	TP_STRUCT__entry(
-		__field( const char *,	pool_name		)
 		__field(	  int,	debit_bits		)
 	),
 
 	TP_fast_assign(
-		__entry->pool_name	= pool_name;
 		__entry->debit_bits	= debit_bits;
 	),
 
-	TP_printk("%s: debit_bits %d", __entry->pool_name,
-		  __entry->debit_bits)
+	TP_printk("input pool: debit_bits %d", __entry->debit_bits)
 );
 
 TRACE_EVENT(add_input_randomness,
@@ -170,36 +161,31 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
 );
 
 DECLARE_EVENT_CLASS(random__extract_entropy,
-	TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
-		 unsigned long IP),
+	TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
 
-	TP_ARGS(pool_name, nbytes, entropy_count, IP),
+	TP_ARGS(nbytes, entropy_count, IP),
 
 	TP_STRUCT__entry(
-		__field( const char *,	pool_name		)
 		__field(	  int,	nbytes			)
 		__field(	  int,	entropy_count		)
 		__field(unsigned long,	IP			)
 	),
 
 	TP_fast_assign(
-		__entry->pool_name	= pool_name;
 		__entry->nbytes		= nbytes;
 		__entry->entropy_count	= entropy_count;
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: nbytes %d entropy_count %d caller %pS",
-		  __entry->pool_name, __entry->nbytes, __entry->entropy_count,
-		  (void *)__entry->IP)
+	TP_printk("input pool: nbytes %d entropy_count %d caller %pS",
+		  __entry->nbytes, __entry->entropy_count, (void *)__entry->IP)
 );
 
 
 DEFINE_EVENT(random__extract_entropy, extract_entropy,
-	TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
-		 unsigned long IP),
+	TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
 
-	TP_ARGS(pool_name, nbytes, entropy_count, IP)
+	TP_ARGS(nbytes, entropy_count, IP)
 );
 
 TRACE_EVENT(urandom_read,
-- 
cgit v1.2.3


From be80a1d3f9dbe5aee79a325964f7037fe2d92f30 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 10 Jan 2022 14:05:49 +0000
Subject: bpf: Generalize check_ctx_reg for reuse with other types

Generalize the check_ctx_reg() helper function into a more generic named one
so that it can be reused for other register types as well to check whether
their offset is non-zero. No functional change.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 143401d4c9d9..e9993172f892 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
 void
 bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 
-int check_ctx_reg(struct bpf_verifier_env *env,
-		  const struct bpf_reg_state *reg, int regno);
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+		      const struct bpf_reg_state *reg, int regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 		   u32 regno, u32 mem_size);
 
-- 
cgit v1.2.3


From a672b2e36a648afb04ad3bda93b6bda947a479a5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 13 Jan 2022 11:11:30 +0000
Subject: bpf: Fix ringbuf memory type confusion when passing to helpers

The bpf_ringbuf_submit() and bpf_ringbuf_discard() have ARG_PTR_TO_ALLOC_MEM
in their bpf_func_proto definition as their first argument, and thus both expect
the result from a prior bpf_ringbuf_reserve() call which has a return type of
RET_PTR_TO_ALLOC_MEM_OR_NULL.

While the non-NULL memory from bpf_ringbuf_reserve() can be passed to other
helpers, the two sinks (bpf_ringbuf_submit(), bpf_ringbuf_discard()) right now
only enforce a register type of PTR_TO_MEM.

This can lead to potential type confusion since it would allow other PTR_TO_MEM
memory to be passed into the two sinks which did not come from bpf_ringbuf_reserve().

Add a new MEM_ALLOC composable type attribute for PTR_TO_MEM, and enforce that:

 - bpf_ringbuf_reserve() returns NULL or PTR_TO_MEM | MEM_ALLOC
 - bpf_ringbuf_submit() and bpf_ringbuf_discard() only take PTR_TO_MEM | MEM_ALLOC
   but not plain PTR_TO_MEM arguments via ARG_PTR_TO_ALLOC_MEM
 - however, other helpers might treat PTR_TO_MEM | MEM_ALLOC as plain PTR_TO_MEM
   to populate the memory area when they use ARG_PTR_TO_{UNINIT_,}MEM in their
   func proto description

Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..fa517ae604ad 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -316,7 +316,12 @@ enum bpf_type_flag {
 	 */
 	MEM_RDONLY		= BIT(1 + BPF_BASE_TYPE_BITS),
 
-	__BPF_TYPE_LAST_FLAG	= MEM_RDONLY,
+	/* MEM was "allocated" from a different helper, and cannot be mixed
+	 * with regular non-MEM_ALLOC'ed MEM types.
+	 */
+	MEM_ALLOC		= BIT(2 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= MEM_ALLOC,
 };
 
 /* Max number of base types. */
@@ -400,7 +405,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
 	RET_PTR_TO_TCP_SOCK_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
-	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
-- 
cgit v1.2.3


From e6eec09b7bc7869a49ac0ff376415bad40030ade Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 8 Dec 2021 01:52:15 +0000
Subject: KVM: Drop unused kvm_vcpu.pre_pcpu field

Remove kvm_vcpu.pre_pcpu as it no longer has any users.  No functional
change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20211208015236.1616697-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3c47b146851a..5c3c67b6318f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,7 +309,6 @@ struct kvm_vcpu {
 	u64 requests;
 	unsigned long guest_debug;
 
-	int pre_pcpu;
 	struct list_head blocked_vcpu_list;
 
 	struct mutex mutex;
-- 
cgit v1.2.3


From 12a8eee5686ef3ea7d8db90cd664f11e4a39e349 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 8 Dec 2021 01:52:16 +0000
Subject: KVM: Move x86 VMX's posted interrupt list_head to vcpu_vmx

Move the seemingly generic block_vcpu_list from kvm_vcpu to vcpu_vmx, and
rename the list and all associated variables to clarify that it tracks
the set of vCPU that need to be poked on a posted interrupt to the wakeup
vector.  The list is not used to track _all_ vCPUs that are blocking, and
the term "blocked" can be misleading as it may refer to a blocking
condition in the host or the guest, where as the PI wakeup case is
specifically for the vCPUs that are actively blocking from within the
guest.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20211208015236.1616697-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5c3c67b6318f..f079820f52b5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,8 +309,6 @@ struct kvm_vcpu {
 	u64 requests;
 	unsigned long guest_debug;
 
-	struct list_head blocked_vcpu_list;
-
 	struct mutex mutex;
 	struct kvm_run *run;
 
-- 
cgit v1.2.3


From 60639f74c2f4fcc3ffa2ac0b120eaa874ccc713f Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Date: Mon, 6 Dec 2021 11:46:50 +0100
Subject: asm-generic: Prepare for riscv use of pud_alloc_one and pud_free

In the following commits, riscv will almost use the generic versions of
pud_alloc_one and pud_free but an additional check is required since those
functions are only relevant when using at least a 4-level page table, which
will be determined at runtime on riscv.

So move the content of those functions into other functions that riscv
can use without duplicating code.

Signed-off-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/asm-generic/pgalloc.h | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 02932efad3ab..977bea16cf1b 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #if CONFIG_PGTABLE_LEVELS > 3
 
+static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	gfp_t gfp = GFP_PGTABLE_USER;
+
+	if (mm == &init_mm)
+		gfp = GFP_PGTABLE_KERNEL;
+	return (pud_t *)get_zeroed_page(gfp);
+}
+
 #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
 /**
  * pud_alloc_one - allocate a page for PUD-level page table
@@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
  */
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	gfp_t gfp = GFP_PGTABLE_USER;
-
-	if (mm == &init_mm)
-		gfp = GFP_PGTABLE_KERNEL;
-	return (pud_t *)get_zeroed_page(gfp);
+	return __pud_alloc_one(mm, addr);
 }
 #endif
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
 {
 	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
 	free_page((unsigned long)pud);
 }
 
+#ifndef __HAVE_ARCH_PUD_FREE
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	__pud_free(mm, pud);
+}
+#endif
+
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #ifndef __HAVE_ARCH_PGD_FREE
-- 
cgit v1.2.3


From 1ca3fb3abd2b615c4b61728de545760a6e2c2d8b Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Jan 2022 18:07:45 -0800
Subject: mm: percpu: add pcpu_fc_cpu_to_node_fn_t typedef

Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t, pcpu
first chunk allocation will call it to alloc memblock on the
corresponding node by it, this is prepare for the next patch.

Link: https://lkml.kernel.org/r/20211216112359.103822-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ae4004e7957e..e4078bf45fd5 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 
 extern enum pcpu_fc pcpu_chosen_fc;
 
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
-				     size_t align);
+typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
+				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
@@ -111,12 +112,14 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
-- 
cgit v1.2.3


From 23f917169ef157aa7a6bf80d8c4aad6f1282852c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Jan 2022 18:07:49 -0800
Subject: mm: percpu: add generic pcpu_fc_alloc/free funciton

With the previous patch, we could add a generic pcpu first chunk
allocate and free function to cleanup the duplicated definations on each
architecture.

Link: https://lkml.kernel.org/r/20211216112359.103822-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e4078bf45fd5..d73c97ef4ff4 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 extern enum pcpu_fc pcpu_chosen_fc;
 
 typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
-				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
-typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 
@@ -112,16 +109,12 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
-				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn);
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
-- 
cgit v1.2.3


From 20c035764626c56c4f6514936b9ee4be0f4cd962 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Jan 2022 18:07:53 -0800
Subject: mm: percpu: add generic pcpu_populate_pte() function

With NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
populate pte, this patch adds a generic pcpu populate pte function,
pcpu_populate_pte(), which is marked __weak and used on most
architectures, but it is overridden on x86, which has its own
implementation.

Link: https://lkml.kernel.org/r/20211216112359.103822-5-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d73c97ef4ff4..f1ec5ad1351c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 extern enum pcpu_fc pcpu_chosen_fc;
 
 typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
-typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 
 extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
@@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+void __init pcpu_populate_pte(unsigned long addr);
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
-				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 #endif
 
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
-- 
cgit v1.2.3


From ae62fbe299629d3b2fa61d4cf5146258c4d99fdf Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 19 Jan 2022 18:08:00 -0800
Subject: proc: make the proc_create[_data]() stubs static inlines

Change the proc_create[_data]() stubs which are used when CONFIG_PROC_FS
is not set from #defines to a static inline stubs.

This should fix clang -Werror builds failing due to errors like this:

  drivers/platform/x86/thinkpad_acpi.c:918:30: error: unused variable
   'dispatch_proc_ops' [-Werror,-Wunused-const-variable]

Fixing this in include/linux/proc_fs.h should ensure that the same issue
is also fixed in any other drivers hitting the same -Werror issue.

[akpm@linux-foundation.org: fix CONFIG_PROC_FS=n]
[akpm@linux-foundation.org: fix arch/sparc/kernel/led.c]
[akpm@linux-foundation.org: fix build]

Link: https://lkml.kernel.org/r/20211116131112.508304-1-hdegoede@redhat.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 069c7fd95396..01b9268451a8 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -178,8 +178,16 @@ static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
 #define proc_create_seq(name, mode, parent, ops) ({NULL;})
 #define proc_create_single(name, mode, parent, show) ({NULL;})
 #define proc_create_single_data(name, mode, parent, show, data) ({NULL;})
-#define proc_create(name, mode, parent, proc_ops) ({NULL;})
-#define proc_create_data(name, mode, parent, proc_ops, data) ({NULL;})
+
+static inline struct proc_dir_entry *
+proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent,
+	    const struct proc_ops *proc_ops)
+{ return NULL; }
+
+static inline struct proc_dir_entry *
+proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent,
+		 const struct proc_ops *proc_ops, void *data)
+{ return NULL; }
 
 static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {}
 static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {}
-- 
cgit v1.2.3


From 22c033989c3eb9731ad0c497dfab4231b8e367d6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jan 2022 18:08:12 -0800
Subject: include/linux/unaligned: replace kernel.h with the necessary
 inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

The rest of the changes are induced by the above and may not be split.

Link: https://lkml.kernel.org/r/20211209123823.20425-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>	[brcmfmac]
Acked-by: Kalle Valo <kvalo@kernel.org>
Cc: Arend van Spriel <aspriel@gmail.com>
Cc: Franky Lin <franky.lin@broadcom.com>
Cc: Hante Meuleman <hante.meuleman@broadcom.com>
Cc: Chi-hsien Lin <chi-hsien.lin@infineon.com>
Cc: Wright Feng <wright.feng@infineon.com>
Cc: Chung-hsien Hsu <chung-hsien.hsu@infineon.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/unaligned/packed_struct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h
index c0d817de4df2..f4c8eaf4d012 100644
--- a/include/linux/unaligned/packed_struct.h
+++ b/include/linux/unaligned/packed_struct.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
 #define _LINUX_UNALIGNED_PACKED_STRUCT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 struct __una_u16 { u16 x; } __packed;
 struct __una_u32 { u32 x; } __packed;
-- 
cgit v1.2.3


From 40cbf09f060c8febef64541c463d4dd526abe445 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jan 2022 18:08:16 -0800
Subject: kernel.h: include a note to discourage people from including it in
 headers

Include a note at the top to discourage people from including it in
headers.

Link: https://lkml.kernel.org/r/20211209150803.4473-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 77755ac3e189..36a612d82956 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -1,4 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NOTE:
+ *
+ * This header has combined a lot of unrelated to each other stuff.
+ * The process of splitting its content is in progress while keeping
+ * backward compatibility. That's why it's highly recommended NOT to
+ * include this header inside another header file, especially under
+ * generic or architectural include/ directory.
+ */
 #ifndef _LINUX_KERNEL_H
 #define _LINUX_KERNEL_H
 
-- 
cgit v1.2.3


From 95af469c4f609de011debc08e7a35b45201623a8 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Wed, 19 Jan 2022 18:08:29 -0800
Subject: fs/binfmt_elf: replace open-coded string copy with get_task_comm

It is better to use get_task_comm() instead of the open coded string
copy as we do in other places.

struct elf_prpsinfo is used to dump the task information in userspace
coredump or kernel vmcore.  Below is the verification of vmcore,

  crash> ps
     PID    PPID  CPU       TASK        ST  %MEM     VSZ    RSS  COMM
        0      0   0  ffffffff9d21a940  RU   0.0       0      0  [swapper/0]
  >     0      0   1  ffffa09e40f85e80  RU   0.0       0      0  [swapper/1]
  >     0      0   2  ffffa09e40f81f80  RU   0.0       0      0  [swapper/2]
  >     0      0   3  ffffa09e40f83f00  RU   0.0       0      0  [swapper/3]
  >     0      0   4  ffffa09e40f80000  RU   0.0       0      0  [swapper/4]
  >     0      0   5  ffffa09e40f89f80  RU   0.0       0      0  [swapper/5]
        0      0   6  ffffa09e40f8bf00  RU   0.0       0      0  [swapper/6]
  >     0      0   7  ffffa09e40f88000  RU   0.0       0      0  [swapper/7]
  >     0      0   8  ffffa09e40f8de80  RU   0.0       0      0  [swapper/8]
  >     0      0   9  ffffa09e40f95e80  RU   0.0       0      0  [swapper/9]
  >     0      0  10  ffffa09e40f91f80  RU   0.0       0      0  [swapper/10]
  >     0      0  11  ffffa09e40f93f00  RU   0.0       0      0  [swapper/11]
  >     0      0  12  ffffa09e40f90000  RU   0.0       0      0  [swapper/12]
  >     0      0  13  ffffa09e40f9bf00  RU   0.0       0      0  [swapper/13]
  >     0      0  14  ffffa09e40f98000  RU   0.0       0      0  [swapper/14]
  >     0      0  15  ffffa09e40f9de80  RU   0.0       0      0  [swapper/15]

It works well as expected.

Some comments are added to explain why we use the hard-coded 16.

Link: https://lkml.kernel.org/r/20211120112738.45980-5-laoar.shao@gmail.com
Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kees Cook <keescook@chromium.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/elfcore-compat.h | 5 +++++
 include/linux/elfcore.h        | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index e272c3d452ce..54feb64e9b5d 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -43,6 +43,11 @@ struct compat_elf_prpsinfo
 	__compat_uid_t			pr_uid;
 	__compat_gid_t			pr_gid;
 	compat_pid_t			pr_pid, pr_ppid, pr_pgrp, pr_sid;
+	/*
+	 * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+	 * changed as it is exposed to userspace. We'd better make it hard-coded
+	 * here.
+	 */
 	char				pr_fname[16];
 	char				pr_psargs[ELF_PRARGSZ];
 };
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 957ebec35aad..746e081879a5 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -65,6 +65,11 @@ struct elf_prpsinfo
 	__kernel_gid_t	pr_gid;
 	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
 	/* Lots missing */
+	/*
+	 * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+	 * changed as it is exposed to userspace. We'd better make it hard-coded
+	 * here.
+	 */
 	char	pr_fname[16];	/* filename of executable */
 	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
 };
-- 
cgit v1.2.3


From 3087c61ed2c48548b74dd343a5209b87082c682d Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Wed, 19 Jan 2022 18:08:40 -0800
Subject: tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN

As the sched:sched_switch tracepoint args are derived from the kernel,
we'd better make it same with the kernel.  So the macro TASK_COMM_LEN is
converted to type enum, then all the BPF programs can get it through
BTF.

The BPF program which wants to use TASK_COMM_LEN should include the
header vmlinux.h.  Regarding the test_stacktrace_map and
test_tracepoint, as the type defined in linux/bpf.h are also defined in
vmlinux.h, so we don't need to include linux/bpf.h again.

Link: https://lkml.kernel.org/r/20211120112738.45980-8-laoar.shao@gmail.com
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kees Cook <keescook@chromium.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..cecd4806edc6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -274,8 +274,13 @@ struct task_group;
 
 #define get_current_state()	READ_ONCE(current->__state)
 
-/* Task command name length: */
-#define TASK_COMM_LEN			16
+/*
+ * Define the task command name length as enum, then it can be visible to
+ * BPF programs.
+ */
+enum {
+	TASK_COMM_LEN = 16,
+};
 
 extern void scheduler_tick(void);
 
-- 
cgit v1.2.3


From d6986ce24fc00b0638bd29efe8fb7ba7619ed2aa Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Wed, 19 Jan 2022 18:08:43 -0800
Subject: kthread: dynamically allocate memory to store kthread's full name

When I was implementing a new per-cpu kthread cfs_migration, I found the
comm of it "cfs_migration/%u" is truncated due to the limitation of
TASK_COMM_LEN.  For example, the comm of the percpu thread on CPU10~19
all have the same name "cfs_migration/1", which will confuse the user.
This issue is not critical, because we can get the corresponding CPU
from the task's Cpus_allowed.  But for kthreads corresponding to other
hardware devices, it is not easy to get the detailed device info from
task comm, for example,

    jbd2/nvme0n1p2-
    xfs-reclaim/sdf

Currently there are so many truncated kthreads:

    rcu_tasks_kthre
    rcu_tasks_rude_
    rcu_tasks_trace
    poll_mpt3sas0_s
    ext4-rsv-conver
    xfs-reclaim/sd{a, b, c, ...}
    xfs-blockgc/sd{a, b, c, ...}
    xfs-inodegc/sd{a, b, c, ...}
    audit_send_repl
    ecryptfs-kthrea
    vfio-irqfd-clea
    jbd2/nvme0n1p2-
    ...

We can shorten these names to work around this problem, but it may be
not applied to all of the truncated kthreads.  Take 'jbd2/nvme0n1p2-'
for example, it is a nice name, and it is not a good idea to shorten it.

One possible way to fix this issue is extending the task comm size, but
as task->comm is used in lots of places, that may cause some potential
buffer overflows.  Another more conservative approach is introducing a
new pointer to store kthread's full name if it is truncated, which won't
introduce too much overhead as it is in the non-critical path.  Finally
we make a dicision to use the second approach.  See also the discussions
in this thread:
https://lore.kernel.org/lkml/20211101060419.4682-1-laoar.shao@gmail.com/

After this change, the full name of these truncated kthreads will be
displayed via /proc/[pid]/comm:

    rcu_tasks_kthread
    rcu_tasks_rude_kthread
    rcu_tasks_trace_kthread
    poll_mpt3sas0_statu
    ext4-rsv-conversion
    xfs-reclaim/sdf1
    xfs-blockgc/sdf1
    xfs-inodegc/sdf1
    audit_send_reply
    ecryptfs-kthread
    vfio-irqfd-cleanup
    jbd2/nvme0n1p2-8

Link: https://lkml.kernel.org/r/20211120112850.46047-1-laoar.shao@gmail.com
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Suggested-by: Petr Mladek <pmladek@suse.com>
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 346b0f269161..2a5c04494663 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -33,6 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 					  unsigned int cpu,
 					  const char *namefmt);
 
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk);
 void set_kthread_struct(struct task_struct *p);
 
 void kthread_set_per_cpu(struct task_struct *k, int cpu);
-- 
cgit v1.2.3


From 0425473037db40d9e322631f2d4dc6ef51f97e88 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jan 2022 18:08:56 -0800
Subject: list: introduce list_is_head() helper and re-use it in list.h

Introduce list_is_head() in the similar (*) way as it's done for
list_entry_is_head().  Make use of it in the list.h.

*) it's done as inliner and not a macro to be aligned with other
   list_is_*() APIs; while at it, make all three to have the same
   style.

Link: https://lkml.kernel.org/r/20211201141824.81400-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 6636fc07f918..dd6c2041d09c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -258,8 +258,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
  * @list: the entry to test
  * @head: the head of the list
  */
-static inline int list_is_first(const struct list_head *list,
-					const struct list_head *head)
+static inline int list_is_first(const struct list_head *list, const struct list_head *head)
 {
 	return list->prev == head;
 }
@@ -269,12 +268,21 @@ static inline int list_is_first(const struct list_head *list,
  * @list: the entry to test
  * @head: the head of the list
  */
-static inline int list_is_last(const struct list_head *list,
-				const struct list_head *head)
+static inline int list_is_last(const struct list_head *list, const struct list_head *head)
 {
 	return list->next == head;
 }
 
+/**
+ * list_is_head - tests whether @list is the list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_head(const struct list_head *list, const struct list_head *head)
+{
+	return list == head;
+}
+
 /**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
@@ -318,7 +326,7 @@ static inline void list_del_init_careful(struct list_head *entry)
 static inline int list_empty_careful(const struct list_head *head)
 {
 	struct list_head *next = smp_load_acquire(&head->next);
-	return (next == head) && (next == head->prev);
+	return list_is_head(next, head) && (next == head->prev);
 }
 
 /**
@@ -393,10 +401,9 @@ static inline void list_cut_position(struct list_head *list,
 {
 	if (list_empty(head))
 		return;
-	if (list_is_singular(head) &&
-		(head->next != entry && head != entry))
+	if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next))
 		return;
-	if (entry == head)
+	if (list_is_head(entry, head))
 		INIT_LIST_HEAD(list);
 	else
 		__list_cut_position(list, head, entry);
@@ -570,7 +577,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
+	for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
 
 /**
  * list_for_each_continue - continue iteration over a list
@@ -580,7 +587,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Continue to iterate over a list, continuing after the current position.
  */
 #define list_for_each_continue(pos, head) \
-	for (pos = pos->next; pos != (head); pos = pos->next)
+	for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next)
 
 /**
  * list_for_each_prev	-	iterate over a list backwards
@@ -588,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; pos != (head); pos = pos->prev)
+	for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
 
 /**
  * list_for_each_safe - iterate over a list safe against removal of list entry
@@ -597,8 +604,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
+	for (pos = (head)->next, n = pos->next; \
+	     !list_is_head(pos, (head)); \
+	     pos = n, n = pos->next)
 
 /**
  * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
@@ -608,7 +616,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_prev_safe(pos, n, head) \
 	for (pos = (head)->prev, n = pos->prev; \
-	     pos != (head); \
+	     !list_is_head(pos, (head)); \
 	     pos = n, n = pos->prev)
 
 /**
-- 
cgit v1.2.3


From fd0a1462405b087377e59b84e119fe7e2d08499a Mon Sep 17 00:00:00 2001
From: Isabella Basso <isabbasso@riseup.net>
Date: Wed, 19 Jan 2022 18:09:02 -0800
Subject: hash.h: remove unused define directive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "test_hash.c: refactor into KUnit", v3.

We refactored the lib/test_hash.c file into KUnit as part of the student
group LKCAMP [1] introductory hackathon for kernel development.

This test was pointed to our group by Daniel Latypov [2], so its full
conversion into a pure KUnit test was our goal in this patch series, but
we ran into many problems relating to it not being split as unit tests,
which complicated matters a bit, as the reasoning behind the original
tests is quite cryptic for those unfamiliar with hash implementations.

Some interesting developments we'd like to highlight are:

 - In patch 1/5 we noticed that there was an unused define directive
   that could be removed.

 - In patch 4/5 we noticed how stringhash and hash tests are all under
   the lib/test_hash.c file, which might cause some confusion, and we
   also broke those kernel config entries up.

Overall KUnit developments have been made in the other patches in this
series:

In patches 2/5, 3/5 and 5/5 we refactored the lib/test_hash.c file so as
to make it more compatible with the KUnit style, whilst preserving the
original idea of the maintainer who designed it (i.e.  George Spelvin),
which might be undesirable for unit tests, but we assume it is enough
for a first patch.

This patch (of 5):

Currently, there exist hash_32() and __hash_32() functions, which were
introduced in a patch [1] targeting architecture specific optimizations.
These functions can be overridden on a per-architecture basis to achieve
such optimizations.  They must set their corresponding define directive
(HAVE_ARCH_HASH_32 and HAVE_ARCH__HASH_32, respectively) so that header
files can deal with these overrides properly.

As the supported 32-bit architectures that have their own hash function
implementation (i.e.  m68k, Microblaze, H8/300, pa-risc) have only been
making use of the (more general) __hash_32() function (which only lacks
a right shift operation when compared to the hash_32() function), remove
the define directive corresponding to the arch-specific hash_32()
implementation.

[1] https://lore.kernel.org/lkml/20160525073311.5600.qmail@ns.sciencehorizons.net/

[akpm@linux-foundation.org: hash_32_generic() becomes hash_32()]

Link: https://lkml.kernel.org/r/20211208183711.390454-1-isabbasso@riseup.net
Link: https://lkml.kernel.org/r/20211208183711.390454-2-isabbasso@riseup.net
Reviewed-by: David Gow <davidgow@google.com>
Tested-by: David Gow <davidgow@google.com>
Co-developed-by: Augusto Durães Camargo <augusto.duraes33@gmail.com>
Signed-off-by: Augusto Durães Camargo <augusto.duraes33@gmail.com>
Co-developed-by: Enzo Ferreira <ferreiraenzoa@gmail.com>
Signed-off-by: Enzo Ferreira <ferreiraenzoa@gmail.com>
Signed-off-by: Isabella Basso <isabbasso@riseup.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Daniel Latypov <dlatypov@google.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Cc: kernel test robot <lkp@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hash.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hash.h b/include/linux/hash.h
index ad6fa21d977b..38edaa08f862 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
 	return val * GOLDEN_RATIO_32;
 }
 
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
 {
 	/* High bits are more random, so use them. */
 	return __hash_32(val) >> (32 - bits);
-- 
cgit v1.2.3


From 60c7801b121aa0e90d8aae7245859aec0ce2306f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jan 2022 18:09:19 -0800
Subject: kunit: replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Link: https://lkml.kernel.org/r/20211213204441.56204-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/kunit/assert.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kunit/assert.h b/include/kunit/assert.h
index ad889b539ab3..ccbc36c0b02f 100644
--- a/include/kunit/assert.h
+++ b/include/kunit/assert.h
@@ -10,7 +10,7 @@
 #define _KUNIT_ASSERT_H
 
 #include <linux/err.h>
-#include <linux/kernel.h>
+#include <linux/printk.h>
 
 struct kunit;
 struct string_stream;
-- 
cgit v1.2.3


From 8e930a66993be0a5f9a97c7c1c76ef09db4ef8bb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jan 2022 18:09:22 -0800
Subject: uuid: discourage people from using UAPI header in new code

Discourage people from using UAPI header in new code by adding a note.

Link: https://lkml.kernel.org/r/20211216113552.81199-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/uuid.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index e5a7eecef7c3..32615dc5f0cf 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* DO NOT USE in new code! This is solely for MEI due to legacy reasons */
 /*
  * UUID/GUID definition
  *
-- 
cgit v1.2.3


From c7e4289cbe668c2743ac0fd623a2518dbc191dc0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jan 2022 18:09:25 -0800
Subject: uuid: remove licence boilerplate text from the header

Remove licence boilerplate text from the UAPI header.

Link: https://lkml.kernel.org/r/20211216113552.81199-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/uuid.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index 32615dc5f0cf..c0f4bd9b040e 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -5,15 +5,6 @@
  *
  * Copyright (C) 2010, Intel Corp.
  *	Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation;
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _UAPI_LINUX_UUID_H_
-- 
cgit v1.2.3


From 23b36fec7e14f8cf1c17e832e53dd4761e0dfe83 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Wed, 19 Jan 2022 18:09:56 -0800
Subject: panic: use error_report_end tracepoint on warnings

Introduce the error detector "warning" to the error_report event and use
the error_report_end tracepoint at the end of a warning report.

This allows in-kernel tests but also userspace to more easily determine
if a warning occurred without polling kernel logs.

[akpm@linux-foundation.org: add comma to enum list, per Andy]

Link: https://lkml.kernel.org/r/20211115085630.1756817-1-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/error_report.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h
index 96f64bf218b2..a1922a800e6f 100644
--- a/include/trace/events/error_report.h
+++ b/include/trace/events/error_report.h
@@ -17,14 +17,16 @@
 
 enum error_detector {
 	ERROR_DETECTOR_KFENCE,
-	ERROR_DETECTOR_KASAN
+	ERROR_DETECTOR_KASAN,
+	ERROR_DETECTOR_WARN,
 };
 
 #endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
-#define error_detector_list	\
+#define error_detector_list			\
 	EM(ERROR_DETECTOR_KFENCE, "kfence")	\
-	EMe(ERROR_DETECTOR_KASAN, "kasan")
+	EM(ERROR_DETECTOR_KASAN, "kasan")	\
+	EMe(ERROR_DETECTOR_WARN, "warning")
 /* Always end the list with an EMe. */
 
 #undef EM
-- 
cgit v1.2.3


From a3d5dc908a5f572ce3e31fe83fd2459a1c3c5422 Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang29@zte.com.cn>
Date: Wed, 19 Jan 2022 18:10:02 -0800
Subject: delayacct: support swapin delay accounting for swapping without blkio

Currently delayacct accounts swapin delay only for swapping that cause
blkio.  If we use zram for swapping, tools/accounting/getdelays can't
get any SWAP delay.

It's useful to get zram swapin delay information, for example to adjust
compress algorithm or /proc/sys/vm/swappiness.

Reference to PSI, it accounts any kind of swapping by doing its work in
swap_readpage(), no matter whether swapping causes blkio.  Let delayacct
do the similar work.

Link: https://lkml.kernel.org/r/20211112083813.8559-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index af7e6eb50283..b96d68f310a2 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -9,14 +9,6 @@
 
 #include <uapi/linux/taskstats.h>
 
-/*
- * Per-task flags relevant to delay accounting
- * maintained privately to avoid exhausting similar flags in sched.h:PF_*
- * Used to set current->delays->flags
- */
-#define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */
-#define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
-
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
 	raw_spinlock_t	lock;
@@ -37,13 +29,13 @@ struct task_delay_info {
 	 * associated with the operation is added to XXX_delay.
 	 * XXX_delay contains the accumulated delay time in nanoseconds.
 	 */
-	u64 blkio_start;	/* Shared by blkio, swapin */
+	u64 blkio_start;
 	u64 blkio_delay;	/* wait for sync block io completion */
-	u64 swapin_delay;	/* wait for swapin block io completion */
+	u64 swapin_start;
+	u64 swapin_delay;	/* wait for swapin */
 	u32 blkio_count;	/* total count of the number of sync block */
 				/* io operations performed */
-	u32 swapin_count;	/* total count of the number of swapin block */
-				/* io operations performed */
+	u32 swapin_count;	/* total count of swapin */
 
 	u64 freepages_start;
 	u64 freepages_delay;	/* wait for memory reclaim */
@@ -79,14 +71,8 @@ extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
 extern void __delayacct_thrashing_start(void);
 extern void __delayacct_thrashing_end(void);
-
-static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
-{
-	if (p->delays)
-		return (p->delays->flags & DELAYACCT_PF_BLKIO);
-	else
-		return 0;
-}
+extern void __delayacct_swapin_start(void);
+extern void __delayacct_swapin_end(void);
 
 static inline void delayacct_set_flag(struct task_struct *p, int flag)
 {
@@ -123,7 +109,6 @@ static inline void delayacct_blkio_start(void)
 	if (!static_branch_unlikely(&delayacct_key))
 		return;
 
-	delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
 	if (current->delays)
 		__delayacct_blkio_start();
 }
@@ -135,7 +120,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
 
 	if (p->delays)
 		__delayacct_blkio_end(p);
-	delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
 }
 
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
@@ -169,6 +153,18 @@ static inline void delayacct_thrashing_end(void)
 		__delayacct_thrashing_end();
 }
 
+static inline void delayacct_swapin_start(void)
+{
+	if (current->delays)
+		__delayacct_swapin_start();
+}
+
+static inline void delayacct_swapin_end(void)
+{
+	if (current->delays)
+		__delayacct_swapin_end();
+}
+
 #else
 static inline void delayacct_set_flag(struct task_struct *p, int flag)
 {}
@@ -199,6 +195,10 @@ static inline void delayacct_thrashing_start(void)
 {}
 static inline void delayacct_thrashing_end(void)
 {}
+static inline void delayacct_swapin_start(void)
+{}
+static inline void delayacct_swapin_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
-- 
cgit v1.2.3


From 82065b7266899fbdce4c7394d7dd02688161f0cf Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang29@zte.com.cn>
Date: Wed, 19 Jan 2022 18:10:06 -0800
Subject: delayacct: fix incomplete disable operation when switch enable to
 disable

When a task is created after delayacct is enabled, kernel will do all
the delay accountings for that task.  The problems is if user disables
delayacct by set /proc/sys/kernel/task_delayacct to zero, only blkio
delay accounting is disabled.

Now disable all the kinds of delay accountings when
/proc/sys/kernel/task_delayacct sets to zero.

Link: https://lkml.kernel.org/r/20211123140342.32962-1-ran.xiaokai@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index b96d68f310a2..c675cfb6437e 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -131,36 +131,54 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 
 static inline void delayacct_freepages_start(void)
 {
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
 	if (current->delays)
 		__delayacct_freepages_start();
 }
 
 static inline void delayacct_freepages_end(void)
 {
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
 	if (current->delays)
 		__delayacct_freepages_end();
 }
 
 static inline void delayacct_thrashing_start(void)
 {
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
 	if (current->delays)
 		__delayacct_thrashing_start();
 }
 
 static inline void delayacct_thrashing_end(void)
 {
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
 	if (current->delays)
 		__delayacct_thrashing_end();
 }
 
 static inline void delayacct_swapin_start(void)
 {
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
 	if (current->delays)
 		__delayacct_swapin_start();
 }
 
 static inline void delayacct_swapin_end(void)
 {
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
 	if (current->delays)
 		__delayacct_swapin_end();
 }
-- 
cgit v1.2.3


From 1193829da1a6728249cd02577a020bd64fd9c160 Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang29@zte.com.cn>
Date: Wed, 19 Jan 2022 18:10:09 -0800
Subject: delayacct: cleanup flags in struct task_delay_info and functions use
 it

Flags in struct task_delay_info is used to distinguish the difference
between swapin and blkio delay acountings.  But after patch "delayacct:
support swapin delay accounting for swapping without blkio", there is no
need to do that since swapin and blkio delay accounting use their own
functions.

Link: https://lkml.kernel.org/r/20211124065958.36703-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index c675cfb6437e..435c3654a0ff 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -12,7 +12,6 @@
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
 	raw_spinlock_t	lock;
-	unsigned int	flags;	/* Private per-task flags */
 
 	/* For each stat XXX, add following, aligned appropriately
 	 *
@@ -74,18 +73,6 @@ extern void __delayacct_thrashing_end(void);
 extern void __delayacct_swapin_start(void);
 extern void __delayacct_swapin_end(void);
 
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{
-	if (p->delays)
-		p->delays->flags |= flag;
-}
-
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{
-	if (p->delays)
-		p->delays->flags &= ~flag;
-}
-
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {
 	/* reinitialize in case parent's non-null pointer was dup'ed*/
@@ -184,10 +171,6 @@ static inline void delayacct_swapin_end(void)
 }
 
 #else
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{}
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{}
 static inline void delayacct_init(void)
 {}
 static inline void delayacct_tsk_init(struct task_struct *tsk)
-- 
cgit v1.2.3


From 5bf18281534451bf1ad56a45a3085cd7ad46860d Mon Sep 17 00:00:00 2001
From: wangyong <wang.yong12@zte.com.cn>
Date: Wed, 19 Jan 2022 18:10:15 -0800
Subject: delayacct: track delays from memory compact

Delay accounting does not track the delay of memory compact.  When there
is not enough free memory, tasks can spend a amount of their time
waiting for compact.

To get the impact of tasks in direct memory compact, measure the delay
when allocating memory through memory compact.

Also update tools/accounting/getdelays.c:

    / # ./getdelays_next  -di -p 304
    print delayacct stats ON
    printing IO accounting
    PID     304

    CPU             count     real total  virtual total    delay total  delay average
                      277      780000000      849039485       18877296          0.068ms
    IO              count    delay total  delay average
                        0              0              0ms
    SWAP            count    delay total  delay average
                        0              0              0ms
    RECLAIM         count    delay total  delay average
                        5    11088812685           2217ms
    THRASHING       count    delay total  delay average
                        0              0              0ms
    COMPACT         count    delay total  delay average
                        3          72758              0ms
    watch: read=0, write=0, cancelled_write=0

Link: https://lkml.kernel.org/r/1638619795-71451-1-git-send-email-wang.yong12@zte.com.cn
Signed-off-by: wangyong <wang.yong12@zte.com.cn>
Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Zhang Wenya <zhang.wenya1@zte.com.cn>
Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h      | 28 ++++++++++++++++++++++++++++
 include/uapi/linux/taskstats.h |  6 +++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 435c3654a0ff..3e03d010bd2e 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -42,8 +42,12 @@ struct task_delay_info {
 	u64 thrashing_start;
 	u64 thrashing_delay;	/* wait for thrashing page */
 
+	u64 compact_start;
+	u64 compact_delay;	/* wait for memory compact */
+
 	u32 freepages_count;	/* total count of memory reclaim */
 	u32 thrashing_count;	/* total count of thrash waits */
+	u32 compact_count;	/* total count of memory compact */
 };
 #endif
 
@@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void);
 extern void __delayacct_thrashing_end(void);
 extern void __delayacct_swapin_start(void);
 extern void __delayacct_swapin_end(void);
+extern void __delayacct_compact_start(void);
+extern void __delayacct_compact_end(void);
 
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {
@@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void)
 		__delayacct_swapin_end();
 }
 
+static inline void delayacct_compact_start(void)
+{
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
+	if (current->delays)
+		__delayacct_compact_start();
+}
+
+static inline void delayacct_compact_end(void)
+{
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
+	if (current->delays)
+		__delayacct_compact_end();
+}
+
 #else
 static inline void delayacct_init(void)
 {}
@@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void)
 {}
 static inline void delayacct_swapin_end(void)
 {}
+static inline void delayacct_compact_start(void)
+{}
+static inline void delayacct_compact_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index ccbd08709321..12327d32378f 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION	10
+#define TASKSTATS_VERSION	11
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -172,6 +172,10 @@ struct taskstats {
 
 	/* v10: 64-bit btime to avoid overflow */
 	__u64	ac_btime64;		/* 64-bit begin time */
+
+	/* Delay waiting for memory compact */
+	__u64	compact_count;
+	__u64	compact_delay_total;
 };
 
 
-- 
cgit v1.2.3


From 440323b6cf5b9896013a78c4f578823e8243a7fd Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Fri, 14 Jan 2022 18:58:57 +0800
Subject: asm-generic: Add missing brackets for io_stop_wc macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After using io_stop_wc(), drivers reports following compile error when
compiled on X86.

  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c: In function ‘hns3_tx_push_bd’:
  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c:2058:12: error: expected ‘;’ before ‘(’ token
    io_stop_wc();
              ^
It is because I missed to add the brackets after io_stop_wc macro. So
let's add the missing brackets.

Fixes: d5624bb29f49 ("asm-generic: introduce io_stop_wc() and add implementation for ARM64")
Reported-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Link: https://lore.kernel.org/r/20220114105857.126300-1-wangxiongfeng2@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/asm-generic/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 4c2c1b830344..3385ca5ca5c0 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -259,7 +259,7 @@ do {									\
  * write-combining memory accesses before this macro with those after it.
  */
 #ifndef io_stop_wc
-#define io_stop_wc do { } while (0)
+#define io_stop_wc() do { } while (0)
 #endif
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.3


From 66a8f7f04979f4ad739085f01d99c8caf620b4f5 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Tue, 18 Jan 2022 18:35:02 +0100
Subject: of: base: make small of_parse_phandle() variants static inline

Make all the smaller variants of the of_parse_phandle() static inline.
This also let us remove the empty function stubs if CONFIG_OF is not
defined.

Suggested-by: Rob Herring <robh@kernel.org>
Signed-off-by: Michael Walle <michael@walle.cc>
[robh: move index < 0 check into __of_parse_phandle_with_args]
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220118173504.2867523-2-michael@walle.cc
---
 include/linux/of.h | 148 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 120 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index ff143a027abc..16d76c92fbe0 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -364,18 +364,12 @@ extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
 extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args);
-extern struct device_node *of_parse_phandle(const struct device_node *np,
-					    const char *phandle_name,
-					    int index);
-extern int of_parse_phandle_with_args(const struct device_node *np,
-	const char *list_name, const char *cells_name, int index,
-	struct of_phandle_args *out_args);
+extern int __of_parse_phandle_with_args(const struct device_node *np,
+	const char *list_name, const char *cells_name, int cell_count,
+	int index, struct of_phandle_args *out_args);
 extern int of_parse_phandle_with_args_map(const struct device_node *np,
 	const char *list_name, const char *stem_name, int index,
 	struct of_phandle_args *out_args);
-extern int of_parse_phandle_with_fixed_args(const struct device_node *np,
-	const char *list_name, int cells_count, int index,
-	struct of_phandle_args *out_args);
 extern int of_count_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name);
 
@@ -865,18 +859,12 @@ static inline int of_property_read_string_helper(const struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline struct device_node *of_parse_phandle(const struct device_node *np,
-						   const char *phandle_name,
-						   int index)
-{
-	return NULL;
-}
-
-static inline int of_parse_phandle_with_args(const struct device_node *np,
-					     const char *list_name,
-					     const char *cells_name,
-					     int index,
-					     struct of_phandle_args *out_args)
+static inline int __of_parse_phandle_with_args(const struct device_node *np,
+					       const char *list_name,
+					       const char *cells_name,
+					       int cell_count,
+					       int index,
+					       struct of_phandle_args *out_args)
 {
 	return -ENOSYS;
 }
@@ -890,13 +878,6 @@ static inline int of_parse_phandle_with_args_map(const struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
-	const char *list_name, int cells_count, int index,
-	struct of_phandle_args *out_args)
-{
-	return -ENOSYS;
-}
-
 static inline int of_count_phandle_with_args(const struct device_node *np,
 					     const char *list_name,
 					     const char *cells_name)
@@ -1077,6 +1058,117 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ
 	return np && match && type && !strcmp(match, type);
 }
 
+/**
+ * of_parse_phandle - Resolve a phandle property to a device_node pointer
+ * @np: Pointer to device node holding phandle property
+ * @phandle_name: Name of property holding a phandle value
+ * @index: For properties holding a table of phandles, this is the index into
+ *         the table
+ *
+ * Return: The device_node pointer with refcount incremented.  Use
+ * of_node_put() on it when done.
+ */
+static inline struct device_node *of_parse_phandle(const struct device_node *np,
+						   const char *phandle_name,
+						   int index)
+{
+	struct of_phandle_args args;
+
+	if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
+					 index, &args))
+		return NULL;
+
+	return args.np;
+}
+
+/**
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cells_name:	property name that specifies phandles' arguments count
+ * @index:	index of a phandle to parse out
+ * @out_args:	optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->np
+ * pointer.
+ *
+ * Example::
+ *
+ *  phandle1: node1 {
+ *	#list-cells = <2>;
+ *  };
+ *
+ *  phandle2: node2 {
+ *	#list-cells = <1>;
+ *  };
+ *
+ *  node3 {
+ *	list = <&phandle1 1 2 &phandle2 3>;
+ *  };
+ *
+ * To get a device_node of the ``node2`` node you may call this:
+ * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
+ */
+static inline int of_parse_phandle_with_args(const struct device_node *np,
+					     const char *list_name,
+					     const char *cells_name,
+					     int index,
+					     struct of_phandle_args *out_args)
+{
+	int cell_count = -1;
+
+	/* If cells_name is NULL we assume a cell count of 0 */
+	if (!cells_name)
+		cell_count = 0;
+
+	return __of_parse_phandle_with_args(np, list_name, cells_name,
+					    cell_count, index, out_args);
+}
+
+/**
+ * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cell_count: number of argument cells following the phandle
+ * @index:	index of a phandle to parse out
+ * @out_args:	optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->np
+ * pointer.
+ *
+ * Example::
+ *
+ *  phandle1: node1 {
+ *  };
+ *
+ *  phandle2: node2 {
+ *  };
+ *
+ *  node3 {
+ *	list = <&phandle1 0 2 &phandle2 2 3>;
+ *  };
+ *
+ * To get a device_node of the ``node2`` node you may call this:
+ * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
+ */
+static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
+						   const char *list_name,
+						   int cell_count,
+						   int index,
+						   struct of_phandle_args *out_args)
+{
+	return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
+					    index, out_args);
+}
+
 /**
  * of_property_count_u8_elems - Count the number of u8 elements in a property
  *
-- 
cgit v1.2.3


From 2ca42c3ad9ed875b136065b010753a4caaaa1d38 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Tue, 18 Jan 2022 18:35:03 +0100
Subject: of: property: define of_property_read_u{8,16,32,64}_array()
 unconditionally

We can get rid of all the empty stubs because all these functions call
of_property_read_variable_u{8,16,32,64}_array() which already have an
empty stub if CONFIG_OF is not defined.

Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220118173504.2867523-3-michael@walle.cc
---
 include/linux/of.h | 274 ++++++++++++++++++++++++-----------------------------
 1 file changed, 124 insertions(+), 150 deletions(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 16d76c92fbe0..2dc77430a91a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -410,130 +410,6 @@ extern int of_detach_node(struct device_node *);
 
 #define of_match_ptr(_ptr)	(_ptr)
 
-/**
- * of_property_read_u8_array - Find and read an array of u8 from a property.
- *
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @out_values:	pointer to return value, modified only if return value is 0.
- * @sz:		number of array elements to read
- *
- * Search for a property in a device node and read 8-bit value(s) from
- * it.
- *
- * dts entry of array should be like:
- *  ``property = /bits/ 8 <0x50 0x60 0x70>;``
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u8 value can be decoded.
- */
-static inline int of_property_read_u8_array(const struct device_node *np,
-					    const char *propname,
-					    u8 *out_values, size_t sz)
-{
-	int ret = of_property_read_variable_u8_array(np, propname, out_values,
-						     sz, 0);
-	if (ret >= 0)
-		return 0;
-	else
-		return ret;
-}
-
-/**
- * of_property_read_u16_array - Find and read an array of u16 from a property.
- *
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @out_values:	pointer to return value, modified only if return value is 0.
- * @sz:		number of array elements to read
- *
- * Search for a property in a device node and read 16-bit value(s) from
- * it.
- *
- * dts entry of array should be like:
- *  ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u16 value can be decoded.
- */
-static inline int of_property_read_u16_array(const struct device_node *np,
-					     const char *propname,
-					     u16 *out_values, size_t sz)
-{
-	int ret = of_property_read_variable_u16_array(np, propname, out_values,
-						      sz, 0);
-	if (ret >= 0)
-		return 0;
-	else
-		return ret;
-}
-
-/**
- * of_property_read_u32_array - Find and read an array of 32 bit integers
- * from a property.
- *
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @out_values:	pointer to return value, modified only if return value is 0.
- * @sz:		number of array elements to read
- *
- * Search for a property in a device node and read 32-bit value(s) from
- * it.
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u32 value can be decoded.
- */
-static inline int of_property_read_u32_array(const struct device_node *np,
-					     const char *propname,
-					     u32 *out_values, size_t sz)
-{
-	int ret = of_property_read_variable_u32_array(np, propname, out_values,
-						      sz, 0);
-	if (ret >= 0)
-		return 0;
-	else
-		return ret;
-}
-
-/**
- * of_property_read_u64_array - Find and read an array of 64 bit integers
- * from a property.
- *
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @out_values:	pointer to return value, modified only if return value is 0.
- * @sz:		number of array elements to read
- *
- * Search for a property in a device node and read 64-bit value(s) from
- * it.
- *
- * Return: 0 on success, -EINVAL if the property does not exist,
- * -ENODATA if property does not have a value, and -EOVERFLOW if the
- * property data isn't large enough.
- *
- * The out_values is modified only if a valid u64 value can be decoded.
- */
-static inline int of_property_read_u64_array(const struct device_node *np,
-					     const char *propname,
-					     u64 *out_values, size_t sz)
-{
-	int ret = of_property_read_variable_u64_array(np, propname, out_values,
-						      sz, 0);
-	if (ret >= 0)
-		return 0;
-	else
-		return ret;
-}
-
 /*
  * struct property *prop;
  * const __be32 *p;
@@ -728,32 +604,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_property_read_u8_array(const struct device_node *np,
-			const char *propname, u8 *out_values, size_t sz)
-{
-	return -ENOSYS;
-}
-
-static inline int of_property_read_u16_array(const struct device_node *np,
-			const char *propname, u16 *out_values, size_t sz)
-{
-	return -ENOSYS;
-}
-
-static inline int of_property_read_u32_array(const struct device_node *np,
-					     const char *propname,
-					     u32 *out_values, size_t sz)
-{
-	return -ENOSYS;
-}
-
-static inline int of_property_read_u64_array(const struct device_node *np,
-					     const char *propname,
-					     u64 *out_values, size_t sz)
-{
-	return -ENOSYS;
-}
-
 static inline int of_property_read_u32_index(const struct device_node *np,
 			const char *propname, u32 index, u32 *out_value)
 {
@@ -1328,6 +1178,130 @@ static inline bool of_property_read_bool(const struct device_node *np,
 	return prop ? true : false;
 }
 
+/**
+ * of_property_read_u8_array - Find and read an array of u8 from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_values:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 8-bit value(s) from
+ * it.
+ *
+ * dts entry of array should be like:
+ *  ``property = /bits/ 8 <0x50 0x60 0x70>;``
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u8 value can be decoded.
+ */
+static inline int of_property_read_u8_array(const struct device_node *np,
+					    const char *propname,
+					    u8 *out_values, size_t sz)
+{
+	int ret = of_property_read_variable_u8_array(np, propname, out_values,
+						     sz, 0);
+	if (ret >= 0)
+		return 0;
+	else
+		return ret;
+}
+
+/**
+ * of_property_read_u16_array - Find and read an array of u16 from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_values:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 16-bit value(s) from
+ * it.
+ *
+ * dts entry of array should be like:
+ *  ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u16 value can be decoded.
+ */
+static inline int of_property_read_u16_array(const struct device_node *np,
+					     const char *propname,
+					     u16 *out_values, size_t sz)
+{
+	int ret = of_property_read_variable_u16_array(np, propname, out_values,
+						      sz, 0);
+	if (ret >= 0)
+		return 0;
+	else
+		return ret;
+}
+
+/**
+ * of_property_read_u32_array - Find and read an array of 32 bit integers
+ * from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_values:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 32-bit value(s) from
+ * it.
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u32 value can be decoded.
+ */
+static inline int of_property_read_u32_array(const struct device_node *np,
+					     const char *propname,
+					     u32 *out_values, size_t sz)
+{
+	int ret = of_property_read_variable_u32_array(np, propname, out_values,
+						      sz, 0);
+	if (ret >= 0)
+		return 0;
+	else
+		return ret;
+}
+
+/**
+ * of_property_read_u64_array - Find and read an array of 64 bit integers
+ * from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_values:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 64-bit value(s) from
+ * it.
+ *
+ * Return: 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u64 value can be decoded.
+ */
+static inline int of_property_read_u64_array(const struct device_node *np,
+					     const char *propname,
+					     u64 *out_values, size_t sz)
+{
+	int ret = of_property_read_variable_u64_array(np, propname, out_values,
+						      sz, 0);
+	if (ret >= 0)
+		return 0;
+	else
+		return ret;
+}
+
 static inline int of_property_read_u8(const struct device_node *np,
 				       const char *propname,
 				       u8 *out_value)
-- 
cgit v1.2.3


From 8c39b8bc82aafcc8dd378bd79c76fac8e8a89c8d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2022 11:44:54 +0000
Subject: cachefiles: Make some tracepoint adjustments

Make some adjustments to tracepoints to make the tracing a bit more
followable:

 (1) Standardise on displaying the backing inode number as "B=<hex>" with
     no leading zeros.

 (2) Make the cachefiles_lookup tracepoint log the directory inode number
     as well as the looked-up inode number.

 (3) Add a cachefiles_lookup tracepoint into cachefiles_get_directory() to
     log directory lookup.

 (4) Add a new cachefiles_mkdir tracepoint and use that to log a successful
     mkdir from cachefiles_get_directory().

 (5) Make the cachefiles_unlink and cachefiles_rename tracepoints log the
     inode number of the affected file/dir rather than dentry struct
     pointers.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/164251403694.3435901.9797725381831316715.stgit@warthog.procyon.org.uk/ # v1
---
 include/trace/events/cachefiles.h | 82 ++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 1172529b5b49..093c4acb7a3a 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref,
 
 TRACE_EVENT(cachefiles_lookup,
 	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *dir,
 		     struct dentry *de),
 
-	    TP_ARGS(obj, de),
+	    TP_ARGS(obj, dir, de),
 
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		obj	)
 		    __field(short,			error	)
+		    __field(unsigned long,		dino	)
 		    __field(unsigned long,		ino	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj->debug_id;
+		    __entry->obj	= obj ? obj->debug_id : 0;
+		    __entry->dino	= d_backing_inode(dir)->i_ino;
 		    __entry->ino	= (!IS_ERR(de) && d_backing_inode(de) ?
 					   d_backing_inode(de)->i_ino : 0);
 		    __entry->error	= IS_ERR(de) ? PTR_ERR(de) : 0;
 			   ),
 
-	    TP_printk("o=%08x i=%lx e=%d",
-		      __entry->obj, __entry->ino, __entry->error)
+	    TP_printk("o=%08x dB=%lx B=%lx e=%d",
+		      __entry->obj, __entry->dino, __entry->ino, __entry->error)
+	    );
+
+TRACE_EVENT(cachefiles_mkdir,
+	    TP_PROTO(struct dentry *dir, struct dentry *subdir),
+
+	    TP_ARGS(dir, subdir),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			dir	)
+		    __field(unsigned int,			subdir	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->dir	= d_backing_inode(dir)->i_ino;
+		    __entry->subdir	= d_backing_inode(subdir)->i_ino;
+			   ),
+
+	    TP_printk("dB=%x sB=%x",
+		      __entry->dir,
+		      __entry->subdir)
 	    );
 
 TRACE_EVENT(cachefiles_tmpfile,
@@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile,
 		    __entry->backer	= backer->i_ino;
 			   ),
 
-	    TP_printk("o=%08x b=%08x",
+	    TP_printk("o=%08x B=%x",
 		      __entry->obj,
 		      __entry->backer)
 	    );
@@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link,
 		    __entry->backer	= backer->i_ino;
 			   ),
 
-	    TP_printk("o=%08x b=%08x",
+	    TP_printk("o=%08x B=%x",
 		      __entry->obj,
 		      __entry->backer)
 	    );
 
 TRACE_EVENT(cachefiles_unlink,
 	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
+		     ino_t ino,
 		     enum fscache_why_object_killed why),
 
-	    TP_ARGS(obj, de, why),
+	    TP_ARGS(obj, ino, why),
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
+		    __field(unsigned int,		ino		)
 		    __field(enum fscache_why_object_killed, why		)
 			     ),
 
 	    TP_fast_assign(
 		    __entry->obj	= obj ? obj->debug_id : UINT_MAX;
-		    __entry->de		= de;
+		    __entry->ino	= ino;
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("o=%08x d=%p w=%s",
-		      __entry->obj, __entry->de,
+	    TP_printk("o=%08x B=%x w=%s",
+		      __entry->obj, __entry->ino,
 		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
 	    );
 
 TRACE_EVENT(cachefiles_rename,
 	    TP_PROTO(struct cachefiles_object *obj,
-		     struct dentry *de,
-		     struct dentry *to,
+		     ino_t ino,
 		     enum fscache_why_object_killed why),
 
-	    TP_ARGS(obj, de, to, why),
+	    TP_ARGS(obj, ino, why),
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		obj		)
-		    __field(struct dentry *,		de		)
-		    __field(struct dentry *,		to		)
+		    __field(unsigned int,		ino		)
 		    __field(enum fscache_why_object_killed, why		)
 			     ),
 
 	    TP_fast_assign(
 		    __entry->obj	= obj ? obj->debug_id : UINT_MAX;
-		    __entry->de		= de;
-		    __entry->to		= to;
+		    __entry->ino	= ino;
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("o=%08x d=%p t=%p w=%s",
-		      __entry->obj, __entry->de, __entry->to,
+	    TP_printk("o=%08x B=%x w=%s",
+		      __entry->obj, __entry->ino,
 		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
 	    );
 
@@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency,
 		    __entry->ino	= ino;
 			   ),
 
-	    TP_printk("o=%08x %s i=%llx c=%u",
+	    TP_printk("o=%08x %s B=%llx c=%u",
 		      __entry->obj,
 		      __print_symbolic(__entry->why, cachefiles_coherency_traces),
 		      __entry->ino,
@@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency,
 		    __entry->ino	= ino;
 			   ),
 
-	    TP_printk("V=%08x %s i=%llx",
+	    TP_printk("V=%08x %s B=%llx",
 		      __entry->vol,
 		      __print_symbolic(__entry->why, cachefiles_coherency_traces),
 		      __entry->ino)
@@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read,
 		    __entry->cache_inode = cache_inode;
 			   ),
 
-	    TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x",
+	    TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x",
 		      __entry->rreq, __entry->index,
 		      __print_symbolic(__entry->source, netfs_sreq_sources),
 		      __print_symbolic(__entry->why, cachefiles_prepare_read_traces),
@@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read,
 		    __entry->len	= len;
 			   ),
 
-	    TP_printk("o=%08x b=%08x s=%llx l=%zx",
+	    TP_printk("o=%08x B=%x s=%llx l=%zx",
 		      __entry->obj,
 		      __entry->backer,
 		      __entry->start,
@@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write,
 		    __entry->len	= len;
 			   ),
 
-	    TP_printk("o=%08x b=%08x s=%llx l=%zx",
+	    TP_printk("o=%08x B=%x s=%llx l=%zx",
 		      __entry->obj,
 		      __entry->backer,
 		      __entry->start,
@@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc,
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("o=%08x b=%08x %s l=%llx->%llx",
+	    TP_printk("o=%08x B=%x %s l=%llx->%llx",
 		      __entry->obj,
 		      __entry->backer,
 		      __print_symbolic(__entry->why, cachefiles_trunc_traces),
@@ -549,7 +569,7 @@ TRACE_EVENT(cachefiles_mark_active,
 		    __entry->inode	= inode->i_ino;
 			   ),
 
-	    TP_printk("o=%08x i=%lx",
+	    TP_printk("o=%08x B=%lx",
 		      __entry->obj, __entry->inode)
 	    );
 
@@ -570,7 +590,7 @@ TRACE_EVENT(cachefiles_mark_inactive,
 		    __entry->inode	= inode->i_ino;
 			   ),
 
-	    TP_printk("o=%08x i=%lx",
+	    TP_printk("o=%08x B=%lx",
 		      __entry->obj, __entry->inode)
 	    );
 
@@ -594,7 +614,7 @@ TRACE_EVENT(cachefiles_vfs_error,
 		    __entry->where	= where;
 			   ),
 
-	    TP_printk("o=%08x b=%08x %s e=%d",
+	    TP_printk("o=%08x B=%x %s e=%d",
 		      __entry->obj,
 		      __entry->backer,
 		      __print_symbolic(__entry->where, cachefiles_error_traces),
@@ -621,7 +641,7 @@ TRACE_EVENT(cachefiles_io_error,
 		    __entry->where	= where;
 			   ),
 
-	    TP_printk("o=%08x b=%08x %s e=%d",
+	    TP_printk("o=%08x B=%x %s e=%d",
 		      __entry->obj,
 		      __entry->backer,
 		      __print_symbolic(__entry->where, cachefiles_error_traces),
-- 
cgit v1.2.3


From b64a3314989df8e44c114f377808407f36dbf4f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2022 11:05:13 +0000
Subject: cachefiles: Trace active-mark failure

Add a tracepoint to log failure to apply an active mark to a file in
addition to tracing successfully setting and unsetting the mark.

Also include the backing file inode number in the message logged to dmesg.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/164251404666.3435901.17331742792401482190.stgit@warthog.procyon.org.uk/ # v1
---
 include/trace/events/cachefiles.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 093c4acb7a3a..c6f5aa74db89 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -573,6 +573,27 @@ TRACE_EVENT(cachefiles_mark_active,
 		      __entry->obj, __entry->inode)
 	    );
 
+TRACE_EVENT(cachefiles_mark_failed,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct inode *inode),
+
+	    TP_ARGS(obj, inode),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		obj		)
+		    __field(ino_t,			inode		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj ? obj->debug_id : 0;
+		    __entry->inode	= inode->i_ino;
+			   ),
+
+	    TP_printk("o=%08x B=%lx",
+		      __entry->obj, __entry->inode)
+	    );
+
 TRACE_EVENT(cachefiles_mark_inactive,
 	    TP_PROTO(struct cachefiles_object *obj,
 		     struct inode *inode),
-- 
cgit v1.2.3


From c522e3ad296b7b692ed3960dfde467f2a34b434f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 7 Jan 2022 09:28:41 +0000
Subject: fscache: Add a comment explaining how page-release optimisation works

Add a comment into fscache_note_page_release() to explain how the
page-release optimisation logic works[1].  It's not entirely obvious as it
has nothing to do with whether or not the netfs file contains data.

FSCACHE_COOKIE_NO_DATA_TO_READ is set if we have no data in the cache yet
(ie. the backing file lookup was negative, the file is 0 length or the
cookie got invalidated).  It means that we have no data in the cache, not
that the file is necessarily empty on the server.

FSCACHE_COOKIE_HAVE_DATA is set once we've stored data in the backing file.
From that point on, we have data we *could* read - however, it's covered by
pages in the netfs pagecache until at such time one of those covering pages
is released.

So if we've written data to the cache (HAVE_DATA) and there wasn't any data
in the cache when we started (NO_DATA_TO_READ), it may no longer be true
that we can skip reading from the cache.

Read skipping is done by cachefiles_prepare_read().

Note that tracking is not done on a per-page basis, but only on a per-file
basis.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/043a206f03929c2667a465314144e518070a9b2d.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/164251408479.3435901.9540165422908194636.stgit@warthog.procyon.org.uk/ # v1
---
 include/linux/fscache.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ede50406bcb0..296c5f1d9f35 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
 static inline
 void fscache_note_page_release(struct fscache_cookie *cookie)
 {
+	/* If we've written data to the cache (HAVE_DATA) and there wasn't any
+	 * data in the cache when we started (NO_DATA_TO_READ), it may no
+	 * longer be true that we can skip reading from the cache - so clear
+	 * the flag that causes reads to be skipped.
+	 */
 	if (cookie &&
 	    test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
 	    test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))
-- 
cgit v1.2.3


From ffa65753c43142f3b803486442813744da71cff2 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Fri, 21 Jan 2022 22:10:46 -0800
Subject: mm/migrate.c: rework migration_entry_wait() to not take a pageref

This fixes the FIXME in migrate_vma_check_page().

Before migrating a page migration code will take a reference and check
there are no unexpected page references, failing the migration if there
are.  When a thread faults on a migration entry it will take a temporary
reference to the page to wait for the page to become unlocked signifying
the migration entry has been removed.

This reference is dropped just prior to waiting on the page lock,
however the extra reference can cause migration failures so it is
desirable to avoid taking it.

As migration code already has a reference to the migrating page an extra
reference to wait on PG_locked is unnecessary so long as the reference
can't be dropped whilst setting up the wait.

When faulting on a migration entry the ptl is taken to check the
migration entry.  Removing a migration entry also requires the ptl, and
migration code won't drop its page reference until after the migration
entry has been removed.  Therefore retaining the ptl of a migration
entry is sufficient to ensure the page has a reference.  Reworking
migration_entry_wait() to hold the ptl until the wait setup is complete
means the extra page reference is no longer needed.

[apopple@nvidia.com: v5]
  Link: https://lkml.kernel.org/r/20211213033848.1973946-1-apopple@nvidia.com

Link: https://lkml.kernel.org/r/20211118020754.954425-1-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4850cc5bf813..db96e10eb8da 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
 extern int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page, int extra_count);
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+				spinlock_t *ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
-- 
cgit v1.2.3


From 3ddd9a808cee7284931312f2f3e854c9617f44b2 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:10:50 -0800
Subject: sysctl: add a new register_sysctl_init() interface

Patch series "sysctl: first set of kernel/sysctl cleanups", v2.

Finally had time to respin the series of the work we had started last
year on cleaning up the kernel/sysct.c kitchen sink.  People keeps
stuffing their sysctls in that file and this creates a maintenance
burden.  So this effort is aimed at placing sysctls where they actually
belong.

I'm going to split patches up into series as there is quite a bit of
work.

This first set adds register_sysctl_init() for uses of registerting a
sysctl on the init path, adds const where missing to a few places,
generalizes common values so to be more easy to share, and starts the
move of a few kernel/sysctl.c out where they belong.

The majority of rework on v2 in this first patch set is 0-day fixes.
Eric Biederman's feedback is later addressed in subsequent patch sets.

I'll only post the first two patch sets for now.  We can address the
rest once the first two patch sets get completely reviewed / Acked.

This patch (of 9):

The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

Today though folks heavily rely on tables on kernel/sysctl.c so they can
easily just extend this table with their needed sysctls.  In order to
help users move their sysctls out we need to provide a helper which can
be used during code initialization.

We special-case the initialization use of register_sysctl() since it
*is* safe to fail, given all that sysctls do is provide a dynamic
interface to query or modify at runtime an existing variable.  So the
use case of register_sysctl() on init should *not* stop if the sysctls
don't end up getting registered.  It would be counter productive to stop
boot if a simple sysctl registration failed.

Provide a helper for init then, and document the recommended init levels
to use for callers of this routine.  We will later use this in
subsequent patches to start slimming down kernel/sysctl.c tables and
moving sysctl registration to the code which actually needs these
sysctls.

[mcgrof@kernel.org: major commit log and documentation rephrasing also moved to fs/proc/proc_sysctl.c                  ]

Link: https://lkml.kernel.org/r/20211123202347.818157-1-mcgrof@kernel.org
Link: https://lkml.kernel.org/r/20211123202347.818157-2-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1fa2b69c6fc3..d3ab7969b6b5 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -199,6 +199,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
+extern void __register_sysctl_init(const char *path, struct ctl_table *table,
+				 const char *table_name);
+#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
 void do_sysctl_args(void);
 
 extern int pwrsw_enabled;
-- 
cgit v1.2.3


From 78e36f3b0dae586f623c4a37ec5eb5496f5abbe1 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:10:55 -0800
Subject: sysctl: move some boundary constants from sysctl.c to sysctl_vals

sysctl has helpers which let us specify boundary values for a min or max
int value.  Since these are used for a boundary check only they don't
change, so move these variables to sysctl_vals to avoid adding duplicate
variables.  This will help with our cleanup of kernel/sysctl.c.

[akpm@linux-foundation.org: update it for "mm/pagealloc: sysctl: change watermark_scale_factor max limit to 30%"]
[mcgrof@kernel.org: major rebase]

Link: https://lkml.kernel.org/r/20211123202347.818157-3-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d3ab7969b6b5..47cf70c8eb93 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,9 +38,16 @@ struct ctl_table_header;
 struct ctl_dir;
 
 /* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO	((void *)&sysctl_vals[0])
-#define SYSCTL_ONE	((void *)&sysctl_vals[1])
-#define SYSCTL_INT_MAX	((void *)&sysctl_vals[2])
+#define SYSCTL_NEG_ONE			((void *)&sysctl_vals[0])
+#define SYSCTL_ZERO			((void *)&sysctl_vals[1])
+#define SYSCTL_ONE			((void *)&sysctl_vals[2])
+#define SYSCTL_TWO			((void *)&sysctl_vals[3])
+#define SYSCTL_FOUR			((void *)&sysctl_vals[4])
+#define SYSCTL_ONE_HUNDRED		((void *)&sysctl_vals[5])
+#define SYSCTL_TWO_HUNDRED		((void *)&sysctl_vals[6])
+#define SYSCTL_ONE_THOUSAND		((void *)&sysctl_vals[7])
+#define SYSCTL_THREE_THOUSAND		((void *)&sysctl_vals[8])
+#define SYSCTL_INT_MAX			((void *)&sysctl_vals[9])
 
 extern const int sysctl_vals[];
 
-- 
cgit v1.2.3


From bbe7a10ed83a5fa0b0ff6161ecdc4e65a0e9c993 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:11:00 -0800
Subject: hung_task: move hung_task sysctl interface to hung_task.c

The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move hung_task sysctl interface to hung_task.c and use
register_sysctl() to register the sysctl interface.

[mcgrof@kernel.org: commit log refresh and fixed 2-3 0day reported compile issues]

Link: https://lkml.kernel.org/r/20211123202347.818157-4-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/sysctl.h | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 304f431178fd..c19dd5a2c05c 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,20 +7,8 @@
 struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
-
-#ifdef CONFIG_SMP
-extern unsigned int sysctl_hung_task_all_cpu_backtrace;
-#else
-#define sysctl_hung_task_all_cpu_backtrace 0
-#endif /* CONFIG_SMP */
-
-extern int	     sysctl_hung_task_check_count;
-extern unsigned int  sysctl_hung_task_panic;
+/* used for hung_task and block/ */
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-		void *buffer, size_t *lenp, loff_t *ppos);
 #else
 /* Avoid need for ifdefs elsewhere in the code */
 enum { sysctl_hung_task_timeout_secs = 0 };
-- 
cgit v1.2.3


From 86b12b6c5d6b46e64bf2e8080528781032e4bd90 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:11:24 -0800
Subject: aio: move aio sysctl to aio.c

The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

Move aio sysctl to aio.c and use the new register_sysctl_init() to
register the sysctl interface for aio.

[mcgrof@kernel.org: adjust commit log to justify the move]

Link: https://lkml.kernel.org/r/20211123202347.818157-9-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/aio.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index b83e68dd006f..86892a4fe7c8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -20,8 +20,4 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req,
 				       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
 
-/* for sysctl: */
-extern unsigned long aio_nr;
-extern unsigned long aio_max_nr;
-
 #endif /* __LINUX__AIO_H */
-- 
cgit v1.2.3


From 49a4de75719b6c0f1f375df9908a95cef1e34945 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:11:29 -0800
Subject: dnotify: move dnotify sysctl to dnotify.c

The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move dnotify sysctls to dnotify.c and use the new
register_sysctl_init() to register the sysctl interface.

[mcgrof@kernel.org: adjust the commit log to justify the move]

Link: https://lkml.kernel.org/r/20211123202347.818157-10-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dnotify.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b87c3b85a166..b1d26f9f1c9f 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -29,7 +29,6 @@ struct dnotify_struct {
 			    FS_CREATE | FS_RENAME |\
 			    FS_MOVED_FROM | FS_MOVED_TO)
 
-extern int dir_notify_enable;
 extern void dnotify_flush(struct file *, fl_owner_t);
 extern int fcntl_dirnotify(int, struct file *, unsigned long);
 
-- 
cgit v1.2.3


From 7b9ad122b52c9839e1f68f16c907990a6ad6f793 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:11:59 -0800
Subject: inotify: simplify subdirectory registration with register_sysctl()

There is no need to user boiler plate code to specify a set of base
directories we're going to stuff sysctls under.  Simplify this by using
register_sysctl() and specifying the directory path directly.

Move inotify_user sysctl to inotify_user.c while at it to remove clutter
from kernel/sysctl.c.

[mcgrof@kernel.org: remember to register fanotify_table]
  Link: https://lkml.kernel.org/r/YZ5A6iWLb0h3N3RC@bombadil.infradead.org
[mcgrof@kernel.org: update commit log to reflect new path we decided to take]

Link: https://lkml.kernel.org/r/20211123202422.819032-7-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fanotify.h | 2 --
 include/linux/inotify.h  | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 3afdf339d53c..419cadcd7ff5 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -5,8 +5,6 @@
 #include <linux/sysctl.h>
 #include <uapi/linux/fanotify.h>
 
-extern struct ctl_table fanotify_table[]; /* for sysctl */
-
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
 
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 6a24905f6e1e..8d20caa1b268 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,11 +7,8 @@
 #ifndef _LINUX_INOTIFY_H
 #define _LINUX_INOTIFY_H
 
-#include <linux/sysctl.h>
 #include <uapi/linux/inotify.h>
 
-extern struct ctl_table inotify_table[]; /* for sysctl */
-
 #define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
 			  IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
 			  IN_MOVED_TO | IN_CREATE | IN_DELETE | \
-- 
cgit v1.2.3


From a8f5de894f76f1c73f4a068d04897a5e2f873825 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:12:09 -0800
Subject: eventpoll: simplify sysctl declaration with register_sysctl()

The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the epoll_table sysctl to fs/eventpoll.c and use
register_sysctl().

Link: https://lkml.kernel.org/r/20211123202422.819032-9-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poll.h   | 2 --
 include/linux/sysctl.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/poll.h b/include/linux/poll.h
index 1cdc32b1f1b0..a9e0e1c2d1f2 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -8,12 +8,10 @@
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/sysctl.h>
 #include <linux/uaccess.h>
 #include <uapi/linux/poll.h>
 #include <uapi/linux/eventpoll.h>
 
-extern struct ctl_table epoll_table[]; /* for sysctl */
 /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
    additional memory. */
 #ifdef __clang__
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 47cf70c8eb93..6dd0f277f844 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -219,7 +219,6 @@ extern int no_unaligned_warning;
 extern struct ctl_table sysctl_mount_point[];
 extern struct ctl_table random_table[];
 extern struct ctl_table firmware_config_table[];
-extern struct ctl_table epoll_table[];
 
 #else /* CONFIG_SYSCTL */
 static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
-- 
cgit v1.2.3


From 6aad36d421d8bfe156508fa4edfe67827234cf0f Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:12:13 -0800
Subject: firmware_loader: move firmware sysctl to its own files

Patch series "sysctl: 3rd set of kernel/sysctl cleanups", v2.

This is the third set of patches to help address cleaning the kitchen
seink in kernel/sysctl.c and to move sysctls away to where they are
actually implemented / used.

This patch (of 8):

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the firmware configuration sysctl table to the only place where
it is used, and make it clear that if sysctls are disabled this is not
used.

[akpm@linux-foundation.org: export register_firmware_config_sysctl and unregister_firmware_config_sysctl to modules]
[akpm@linux-foundation.org: use EXPORT_SYMBOL_NS_GPL instead]
[sfr@canb.auug.org.au: fix that so it compiles]
  Link: https://lkml.kernel.org/r/20211201160626.401d828d@canb.auug.org.au
[mcgrof@kernel.org: major commit log update to justify the move]

Link: https://lkml.kernel.org/r/20211124231435.1445213-1-mcgrof@kernel.org
Link: https://lkml.kernel.org/r/20211124231435.1445213-2-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 6dd0f277f844..3985e9c80155 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -218,7 +218,6 @@ extern int no_unaligned_warning;
 
 extern struct ctl_table sysctl_mount_point[];
 extern struct ctl_table random_table[];
-extern struct ctl_table firmware_config_table[];
 
 #else /* CONFIG_SYSCTL */
 static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
-- 
cgit v1.2.3


From 5475e8f03c80bbce7b43a57d861f5acc44a60b22 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:12:18 -0800
Subject: random: move the random sysctl declarations to its own file

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the random sysctls to their own file and use
register_sysctl_init().

[mcgrof@kernel.org: commit log update to justify the move]

Link: https://lkml.kernel.org/r/20211124231435.1445213-3-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 3985e9c80155..fce05a060bc5 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -217,7 +217,6 @@ extern int unaligned_dump_stack;
 extern int no_unaligned_warning;
 
 extern struct ctl_table sysctl_mount_point[];
-extern struct ctl_table random_table[];
 
 #else /* CONFIG_SYSCTL */
 static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
-- 
cgit v1.2.3


From ee9efac48a082904d17a20131aa73d82f058cdd6 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:12:23 -0800
Subject: sysctl: add helper to register a sysctl mount point

The way to create a subdirectory on top of sysctl_mount_point is a bit
obscure, and *why* we do that even so more.  Provide a helper which
makes it clear why we do this.

[akpm@linux-foundation.org: export register_sysctl_mount_point() to
modules]

Link: https://lkml.kernel.org/r/20211124231435.1445213-4-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Suggested-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index fce05a060bc5..746c098a6ff5 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -209,6 +209,8 @@ extern int sysctl_init(void);
 extern void __register_sysctl_init(const char *path, struct ctl_table *table,
 				 const char *table_name);
 #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
+
 void do_sysctl_args(void);
 
 extern int pwrsw_enabled;
@@ -224,6 +226,11 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table *
 	return NULL;
 }
 
+static inline struct sysctl_header *register_sysctl_mount_point(const char *path)
+{
+	return NULL;
+}
+
 static inline struct ctl_table_header *register_sysctl_paths(
 			const struct ctl_path *path, struct ctl_table *table)
 {
-- 
cgit v1.2.3


From 26d1c80fd61e59d5e2eb6fda00e0148a6704ddeb Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:12:38 -0800
Subject: scsi/sg: move sg-big-buff sysctl to scsi/sg.c

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the sg-big-buff sysctl from kernel/sysctl.c to drivers/scsi/sg.c
and use register_sysctl() to register the sysctl interface.

[mcgrof@kernel.org: commit log update]

Link: https://lkml.kernel.org/r/20211124231435.1445213-7-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/scsi/sg.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 843cefb8efce..068e35d36557 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -29,10 +29,6 @@
  * For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html
  */
 
-#ifdef __KERNEL__
-extern int sg_big_buff; /* for sysctl */
-#endif
-
 
 typedef struct sg_iovec /* same structure as used by readv() Linux system */
 {                       /* call. It defines one scatter-gather element. */
-- 
cgit v1.2.3


From 0df8bdd5e3b3e557ce2c2575fce0c64c5dd1045a Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:12:43 -0800
Subject: stackleak: move stack_erasing sysctl to stackleak.c

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the stack_erasing sysctl from kernel/sysctl.c to
kernel/stackleak.c and use register_sysctl() to register the sysctl
interface.

[mcgrof@kernel.org: commit log update]

Link: https://lkml.kernel.org/r/20211124231435.1445213-8-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stackleak.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index a59db2f08e76..ccaab2043fcd 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -23,11 +23,6 @@ static inline void stackleak_task_init(struct task_struct *t)
 # endif
 }
 
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-int stack_erasing_sysctl(struct ctl_table *table, int write,
-			void *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
 #endif
-- 
cgit v1.2.3


From b1f2aff888af54a057c2c3c0d88a13ef5d37b52a Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:12:48 -0800
Subject: sysctl: share unsigned long const values

Provide a way to share unsigned long values.  This will allow others to
not have to re-invent these values.

Link: https://lkml.kernel.org/r/20211124231435.1445213-9-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: James E.J. Bottomley <jejb@linux.ibm.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Qing Wang <wangqing@vivo.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 746c098a6ff5..2de6d20d191b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -51,6 +51,12 @@ struct ctl_dir;
 
 extern const int sysctl_vals[];
 
+#define SYSCTL_LONG_ZERO	((void *)&sysctl_long_vals[0])
+#define SYSCTL_LONG_ONE		((void *)&sysctl_long_vals[1])
+#define SYSCTL_LONG_MAX		((void *)&sysctl_long_vals[2])
+
+extern const unsigned long sysctl_long_vals[];
+
 typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
-- 
cgit v1.2.3


From 1d67fe585049d3e2448b997af78c68cbf90ada09 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:12:52 -0800
Subject: fs: move inode sysctls to its own file

Patch series "sysctl: 4th set of kernel/sysctl cleanups".

This is slimming down the fs uses of kernel/sysctl.c to the point that
the next step is to just get rid of the fs base directory for it and
move that elsehwere, so that next patch series starts dealing with that
to demo how we can end up cleaning up a full base directory from
kernel/sysctl.c, one at a time.

This patch (of 9):

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the inode sysctls to its own file.  Since we are no longer using
this outside of fs/ remove the extern declaration of its respective proc
helper.

We use early_initcall() as it is the earliest we can use.

[arnd@arndb.de: avoid unused-variable warning]
  Link: https://lkml.kernel.org/r/20211203190123.874239-1-arnd@kernel.org

Link: https://lkml.kernel.org/r/20211129205548.605569-1-mcgrof@kernel.org
Link: https://lkml.kernel.org/r/20211129205548.605569-2-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kees Cook <keescook@chromium.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c8510da6cc6d..044de67c8167 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -82,7 +82,6 @@ extern void __init files_maxfiles_init(void);
 extern struct files_stat_struct files_stat;
 extern unsigned long get_max_files(void);
 extern unsigned int sysctl_nr_open;
-extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
@@ -3537,8 +3536,6 @@ int proc_nr_files(struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_dentry(struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos);
-int proc_nr_inodes(struct ctl_table *table, int write,
-		   void *buffer, size_t *lenp, loff_t *ppos);
 int __init list_bdev_fs_names(char *buf, size_t size);
 
 #define __FMODE_EXEC		((__force int) FMODE_EXEC)
-- 
cgit v1.2.3


From 204d5a24e15562b2816825c0f9b49d26814b77be Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:12:56 -0800
Subject: fs: move fs stat sysctls to file_table.c

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

We can create the sysctl dynamically on early init for fs stat to help
with this clutter.  This dusts off the fs stat syctls knobs and puts
them into where they are declared.

Link: https://lkml.kernel.org/r/20211129205548.605569-3-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 044de67c8167..1e6761966120 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -79,7 +79,6 @@ extern void __init inode_init_early(void);
 extern void __init files_init(void);
 extern void __init files_maxfiles_init(void);
 
-extern struct files_stat_struct files_stat;
 extern unsigned long get_max_files(void);
 extern unsigned int sysctl_nr_open;
 extern int leases_enable, lease_break_time;
@@ -3532,8 +3531,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 			  size_t len, loff_t *ppos);
 
 struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write,
-		  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_dentry(struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos);
 int __init list_bdev_fs_names(char *buf, size_t size);
-- 
cgit v1.2.3


From c8c0c239d5ab1e3e8d2bb0453ce642fe2c6357ec Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:12:59 -0800
Subject: fs: move dcache sysctls to its own file

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the dcache sysctl clutter out of kernel/sysctl.c.  This is a
small one-off entry, perhaps later we can simplify this representation,
but for now we use the helpers we have.  We won't know how we can
simplify this further untl we're fully done with the cleanup.

[arnd@arndb.de: avoid unused-function warning]
  Link: https://lkml.kernel.org/r/20211203190123.874239-2-arnd@kernel.org

Link: https://lkml.kernel.org/r/20211129205548.605569-4-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 10 ----------
 include/linux/fs.h     |  2 --
 2 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1..f5bba51480b2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -61,16 +61,6 @@ extern const struct qstr empty_name;
 extern const struct qstr slash_name;
 extern const struct qstr dotdot_name;
 
-struct dentry_stat_t {
-	long nr_dentry;
-	long nr_unused;
-	long age_limit;		/* age in seconds */
-	long want_pages;	/* pages requested by system */
-	long nr_negative;	/* # of unused negative dentries */
-	long dummy;		/* Reserved for future use */
-};
-extern struct dentry_stat_t dentry_stat;
-
 /*
  * Try to keep struct dentry aligned on 64 byte cachelines (this will
  * give reasonable cacheline footprint with larger lines without the
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1e6761966120..9b856d5da9e2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3531,8 +3531,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 			  size_t len, loff_t *ppos);
 
 struct ctl_table;
-int proc_nr_dentry(struct ctl_table *table, int write,
-		  void *buffer, size_t *lenp, loff_t *ppos);
 int __init list_bdev_fs_names(char *buf, size_t size);
 
 #define __FMODE_EXEC		((__force int) FMODE_EXEC)
-- 
cgit v1.2.3


From 54771613e8a7dbbba2a205ddf1b33e25a290b3fd Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:03 -0800
Subject: sysctl: move maxolduid as a sysctl specific const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The maxolduid value is only shared for sysctl purposes for use on a max
range.  Just stuff this into our shared const array.

[akpm@linux-foundation.org: fix sysctl_vals[], per Mickaël]

Link: https://lkml.kernel.org/r/20211129205548.605569-5-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 2de6d20d191b..bb921eb8a02d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -49,6 +49,9 @@ struct ctl_dir;
 #define SYSCTL_THREE_THOUSAND		((void *)&sysctl_vals[8])
 #define SYSCTL_INT_MAX			((void *)&sysctl_vals[9])
 
+/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+#define SYSCTL_MAXOLDUID		((void *)&sysctl_vals[10])
+
 extern const int sysctl_vals[];
 
 #define SYSCTL_LONG_ZERO	((void *)&sysctl_long_vals[0])
-- 
cgit v1.2.3


From dd81faa88340a1fe8cd81c8ecbadd8e95c58549c Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:10 -0800
Subject: fs: move locking sysctls where they are used

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

The locking fs sysctls are only used on fs/locks.c, so move them there.

Link: https://lkml.kernel.org/r/20211129205548.605569-7-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9b856d5da9e2..0e08c3dd8f75 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -82,10 +82,6 @@ extern void __init files_maxfiles_init(void);
 extern unsigned long get_max_files(void);
 extern unsigned int sysctl_nr_open;
 extern int leases_enable, lease_break_time;
-extern int sysctl_protected_symlinks;
-extern int sysctl_protected_hardlinks;
-extern int sysctl_protected_fifos;
-extern int sysctl_protected_regular;
 
 typedef __kernel_rwf_t rwf_t;
 
-- 
cgit v1.2.3


From 9c011be132972ff94bde2ae99064e29f94e85c68 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:13 -0800
Subject: fs: move namei sysctls to its own file

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move namei's own sysctl knobs to its own file.

Other than the move we also avoid initializing two static variables to 0
as this is not needed:

  * sysctl_protected_symlinks
  * sysctl_protected_hardlinks

Link: https://lkml.kernel.org/r/20211129205548.605569-8-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0e08c3dd8f75..9617dea24978 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -81,7 +81,6 @@ extern void __init files_maxfiles_init(void);
 
 extern unsigned long get_max_files(void);
 extern unsigned int sysctl_nr_open;
-extern int leases_enable, lease_break_time;
 
 typedef __kernel_rwf_t rwf_t;
 
-- 
cgit v1.2.3


From 1998f19324d24df7de4e74d81503b4299eb99e7d Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:20 -0800
Subject: fs: move pipe sysctls to is own file

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

So move the pipe sysctls to its own file.

Link: https://lkml.kernel.org/r/20211129205548.605569-10-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pipe_fs_i.h | 4 ----
 include/linux/sysctl.h    | 6 ++++++
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index fc5642431b92..c00c618ef290 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -238,10 +238,6 @@ void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 
-extern unsigned int pipe_max_size;
-extern unsigned long pipe_user_pages_hard;
-extern unsigned long pipe_user_pages_soft;
-
 /* Wait for a pipe to be readable/writable while dropping the pipe lock */
 void pipe_wait_readable(struct pipe_inode_info *);
 void pipe_wait_writable(struct pipe_inode_info *);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index bb921eb8a02d..4294e9668bd5 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -221,6 +221,12 @@ extern void __register_sysctl_init(const char *path, struct ctl_table *table,
 extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
 
 void do_sysctl_args(void);
+int do_proc_douintvec(struct ctl_table *table, int write,
+		      void *buffer, size_t *lenp, loff_t *ppos,
+		      int (*conv)(unsigned long *lvalp,
+				  unsigned int *valp,
+				  int write, void *data),
+		      void *data);
 
 extern int pwrsw_enabled;
 extern int unaligned_enabled;
-- 
cgit v1.2.3


From 51cb8dfc5a5c39e6c70376b9dc9a14d624a9d271 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:24 -0800
Subject: sysctl: add and use base directory declarer and registration helper

Patch series "sysctl: add and use base directory declarer and
registration helper".

In this patch series we start addressing base directories, and so we
start with the "fs" sysctls.  The end goal is we end up completely
moving all "fs" sysctl knobs out from kernel/sysctl.

This patch (of 6):

Add a set of helpers which can be used to declare and register base
directory sysctls on their own.  We do this so we can later move each of
the base sysctl directories like "fs", "kernel", etc, to their own
respective files instead of shoving the declarations and registrations
all on kernel/sysctl.c.  The lazy approach has caught up and with this,
we just end up extending the list of base directories / sysctls on one
file and this makes maintenance difficult due to merge conflicts from
many developers.

The declarations are used first by kernel/sysctl.c for registration its
own base which over time we'll try to clean up.  It will be used in the
next patch to demonstrate how to cleanly deal with base sysctl
directories.

[mcgrof@kernel.org: null-terminate the ctl_table arrays]
  Link: https://lkml.kernel.org/r/YafJY3rXDYnjK/gs@bombadil.infradead.org

Link: https://lkml.kernel.org/r/20211129211943.640266-1-mcgrof@kernel.org
Link: https://lkml.kernel.org/r/20211129211943.640266-2-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kees Cook <keescook@chromium.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 4294e9668bd5..02134a8abad7 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -194,6 +194,20 @@ struct ctl_path {
 
 #ifdef CONFIG_SYSCTL
 
+#define DECLARE_SYSCTL_BASE(_name, _table)				\
+static struct ctl_table _name##_base_table[] = {			\
+	{								\
+		.procname	= #_name,				\
+		.mode		= 0555,					\
+		.child		= _table,				\
+	},								\
+	{ },								\
+}
+
+extern int __register_sysctl_base(struct ctl_table *base_table);
+
+#define register_sysctl_base(_name) __register_sysctl_base(_name##_base_table)
+
 void proc_sys_poll_notify(struct ctl_table_poll *poll);
 
 extern void setup_sysctl_set(struct ctl_table_set *p,
@@ -236,6 +250,16 @@ extern int no_unaligned_warning;
 extern struct ctl_table sysctl_mount_point[];
 
 #else /* CONFIG_SYSCTL */
+
+#define DECLARE_SYSCTL_BASE(_name, _table)
+
+static inline int __register_sysctl_base(struct ctl_table *base_table)
+{
+	return 0;
+}
+
+#define register_sysctl_base(table) __register_sysctl_base(table)
+
 static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
 {
 	return NULL;
-- 
cgit v1.2.3


From ab171b952c6e065779687b44041038efdadb3915 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:27 -0800
Subject: fs: move namespace sysctls and declare fs base directory

This moves the namespace sysctls to its own file as part of the
kernel/sysctl.c spring cleaning

Since we have now removed all sysctls for "fs", we now have to declare
it on the filesystem code, we do that using the new helper, which
reduces boiler plate code.

We rename init_fs_shared_sysctls() to init_fs_sysctls() to reflect that
now fs/sysctls.c is taking on the burden of being the first to register
the base directory as well.

Lastly, since init code will load in the order in which we link it we
have to move the sysctl code to be linked in early, so that its early
init routine runs prior to other fs code.  This way, other filesystem
code can register their own sysctls using the helpers after this:

  * register_sysctl_init()
  * register_sysctl()

Link: https://lkml.kernel.org/r/20211129211943.640266-3-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mount.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d92a7e1a742..7f18a7555dff 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -113,9 +113,6 @@ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
 extern dev_t name_to_dev_t(const char *name);
-
-extern unsigned int sysctl_mount_max;
-
 extern bool path_is_mountpoint(const struct path *path);
 
 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
-- 
cgit v1.2.3


From d8c0418aac78e661b5283c9d6a1dfc61d44f26fd Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 21 Jan 2022 22:13:31 -0800
Subject: kernel/sysctl.c: rename sysctl_init() to sysctl_init_bases()

Rename sysctl_init() to sysctl_init_bases() so to reflect exactly what
this is doing.

Link: https://lkml.kernel.org/r/20211129211943.640266-4-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 02134a8abad7..180adf7da785 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -228,7 +228,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
 
 void unregister_sysctl_table(struct ctl_table_header * table);
 
-extern int sysctl_init(void);
+extern int sysctl_init_bases(void);
 extern void __register_sysctl_init(const char *path, struct ctl_table *table,
 				 const char *table_name);
 #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
-- 
cgit v1.2.3


From fdcd4073fccc6f989308be3f1d61d8a68cd990ce Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:13:34 -0800
Subject: printk: fix build warning when CONFIG_PRINTK=n

build warning when CONFIG_PRINTK=n

	kernel/printk/printk.c:175:5: warning: no previous prototype for
	 'devkmsg_sysctl_set_loglvl' [-Wmissing-prototypes]

devkmsg_sysctl_set_loglvl() is only used in sysctl.c when
CONFIG_PRINTK=y, but it participates in the build when CONFIG_PRINTK=n.
So add compile dependency CONFIG_PRINTK=y && CONFIG_SYSCTL=y to fix the
build warning.

Link: https://lkml.kernel.org/r/20211129211943.640266-5-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Stephen Kitt <steve@sk2.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9497f6b98339..1522df223c0f 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -183,10 +183,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 extern int printk_delay_msec;
 extern int dmesg_restrict;
 
-extern int
-devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf,
-			  size_t *lenp, loff_t *ppos);
-
 extern void wake_up_klogd(void);
 
 char *log_buf_addr_get(void);
-- 
cgit v1.2.3


From f0bc21b268c1464603192a00851cdbbf7c2cdc36 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:13:38 -0800
Subject: fs/coredump: move coredump sysctls into its own file

This moves the fs/coredump.c respective sysctls to its own file.

Link: https://lkml.kernel.org/r/20211129211943.640266-6-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Stephen Kitt <steve@sk2.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coredump.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 78fcd776b185..248a68c668b4 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,10 +14,6 @@ struct core_vma_metadata {
 	unsigned long dump_size;
 };
 
-extern int core_uses_pid;
-extern char core_pattern[];
-extern unsigned int core_pipe_limit;
-
 /*
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
@@ -37,4 +33,10 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
 static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
 #endif
 
+#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
+extern void validate_coredump_safety(void);
+#else
+static inline void validate_coredump_safety(void) {}
+#endif
+
 #endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From a737a3c6744bc822d1e6a837fef550e665ddf877 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 21 Jan 2022 22:13:41 -0800
Subject: kprobe: move sysctl_kprobes_optimization to kprobes.c

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

Move sysctl_kprobes_optimization from kernel/sysctl.c to
kernel/kprobes.c.  Use register_sysctl() to register the sysctl
interface.

[mcgrof@kernel.org: fix compile issue when CONFIG_OPTPROBES is disabled]

Link: https://lkml.kernel.org/r/20211129211943.640266-7-mcgrof@kernel.org
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Antti Palosaari <crope@iki.fi>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lukas Middendorf <kernel@tuxforce.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Stephen Kitt <steve@sk2.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kprobes.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8c8f7a4d93af..19b884353b15 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -348,12 +348,6 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
 DEFINE_INSN_CACHE_OPS(optinsn);
 
-#ifdef CONFIG_SYSCTL
-extern int sysctl_kprobes_optimization;
-extern int proc_kprobes_optimization_handler(struct ctl_table *table,
-					     int write, void *buffer,
-					     size_t *length, loff_t *ppos);
-#endif /* CONFIG_SYSCTL */
 extern void wait_for_kprobe_optimizer(void);
 #else /* !CONFIG_OPTPROBES */
 static inline void wait_for_kprobe_optimizer(void) { }
-- 
cgit v1.2.3


From 4a57d6bbaecd28c8175dc5da013009e4158018c2 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Fri, 21 Jan 2022 22:14:10 -0800
Subject: locking/rwlocks: introduce write_lock_nested

In preparation for converting bit_spin_lock to rwlock in zsmalloc so
that multiple writers of zspages can run at the same time but those
zspages are supposed to be different zspage instance.  Thus, it's not
deadlock.  This patch adds write_lock_nested to support the case for
LOCKDEP.

[minchan@kernel.org: fix write_lock_nested for RT]
  Link: https://lkml.kernel.org/r/YZfrMTAXV56HFWJY@google.com
[bigeasy@linutronix.de: fixup write_lock_nested() implementation]
  Link: https://lkml.kernel.org/r/20211123170134.y6xb7pmpgdn4m3bn@linutronix.de

Link: https://lkml.kernel.org/r/20211115185909.3949505-8-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rwlock.h          |  6 ++++++
 include/linux/rwlock_api_smp.h  |  8 ++++++++
 include/linux/rwlock_rt.h       | 10 ++++++++++
 include/linux/spinlock_api_up.h |  1 +
 4 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 2c0ad417ce3c..8f416c5e929e 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -55,6 +55,12 @@ do {								\
 #define write_lock(lock)	_raw_write_lock(lock)
 #define read_lock(lock)		_raw_read_lock(lock)
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define write_lock_nested(lock, subclass)	_raw_write_lock_nested(lock, subclass)
+#else
+#define write_lock_nested(lock, subclass)	_raw_write_lock(lock)
+#endif
+
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
 #define read_lock_irqsave(lock, flags)			\
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index f1db6f17c4fb..dceb0a59b692 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -17,6 +17,7 @@
 
 void __lockfunc _raw_read_lock(rwlock_t *lock)		__acquires(lock);
 void __lockfunc _raw_write_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)	__acquires(lock);
 void __lockfunc _raw_read_lock_bh(rwlock_t *lock)	__acquires(lock);
 void __lockfunc _raw_write_lock_bh(rwlock_t *lock)	__acquires(lock);
 void __lockfunc _raw_read_lock_irq(rwlock_t *lock)	__acquires(lock);
@@ -209,6 +210,13 @@ static inline void __raw_write_lock(rwlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
+static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
+}
+
 #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */
 
 static inline void __raw_write_unlock(rwlock_t *lock)
diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
index 49c1f3842ed5..8544ff05e594 100644
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -28,6 +28,7 @@ extern void rt_read_lock(rwlock_t *rwlock);
 extern int rt_read_trylock(rwlock_t *rwlock);
 extern void rt_read_unlock(rwlock_t *rwlock);
 extern void rt_write_lock(rwlock_t *rwlock);
+extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
 extern int rt_write_trylock(rwlock_t *rwlock);
 extern void rt_write_unlock(rwlock_t *rwlock);
 
@@ -83,6 +84,15 @@ static __always_inline void write_lock(rwlock_t *rwlock)
 	rt_write_lock(rwlock);
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass)
+{
+	rt_write_lock_nested(rwlock, subclass);
+}
+#else
+#define write_lock_nested(lock, subclass)	rt_write_lock(((void)(subclass), (lock)))
+#endif
+
 static __always_inline void write_lock_bh(rwlock_t *rwlock)
 {
 	local_bh_disable();
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d0d188861ad6..b8ba00ccccde 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -59,6 +59,7 @@
 #define _raw_spin_lock_nested(lock, subclass)	__LOCK(lock)
 #define _raw_read_lock(lock)			__LOCK(lock)
 #define _raw_write_lock(lock)			__LOCK(lock)
+#define _raw_write_lock_nested(lock, subclass)	__LOCK(lock)
 #define _raw_spin_lock_bh(lock)			__LOCK_BH(lock)
 #define _raw_read_lock_bh(lock)			__LOCK_BH(lock)
 #define _raw_write_lock_bh(lock)		__LOCK_BH(lock)
-- 
cgit v1.2.3


From 6dfbbae14a7b961f41d80a106e1ab60e86d061c5 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Fri, 21 Jan 2022 22:14:20 -0800
Subject: fs: proc: store PDE()->data into inode->i_private

PDE_DATA(inode) is introduced to get user private data and hide the
layout of struct proc_dir_entry.  The inode->i_private is used to do the
same thing as well.  Save a copy of user private data to inode->
i_private when proc inode is allocated.  This means the user also can
get their private data by inode->i_private.

Introduce pde_data() to wrap inode->i_private so that we can remove
PDE_DATA() from fs/proc/generic.c and make PTE_DATE() as a wrapper of
pde_data().  It will be easier if we decide to remove PDE_DATE() in the
future.

Link: https://lkml.kernel.org/r/20211124081956.87711-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alexey Gladkov <gladkov.alexey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 01b9268451a8..b6e7005cc1b2 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -110,7 +110,18 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t,
 struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops);
 extern void proc_set_size(struct proc_dir_entry *, loff_t);
 extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t);
-extern void *PDE_DATA(const struct inode *);
+
+/*
+ * Obtain the private data passed by user through proc_create_data() or
+ * related.
+ */
+static inline void *pde_data(const struct inode *inode)
+{
+	return inode->i_private;
+}
+
+#define PDE_DATA(i)	pde_data(i)
+
 extern void *proc_get_parent_data(const struct inode *);
 extern void proc_remove(struct proc_dir_entry *);
 extern void remove_proc_entry(const char *, struct proc_dir_entry *);
-- 
cgit v1.2.3


From 359745d78351c6f5442435f81549f0207ece28aa Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Fri, 21 Jan 2022 22:14:23 -0800
Subject: proc: remove PDE_DATA() completely

Remove PDE_DATA() completely and replace it with pde_data().

[akpm@linux-foundation.org: fix naming clash in drivers/nubus/proc.c]
[akpm@linux-foundation.org: now fix it properly]

Link: https://lkml.kernel.org/r/20211124081956.87711-2-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alexey Gladkov <gladkov.alexey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h  | 4 +---
 include/linux/seq_file.h | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b6e7005cc1b2..81d6e4ec2294 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -120,8 +120,6 @@ static inline void *pde_data(const struct inode *inode)
 	return inode->i_private;
 }
 
-#define PDE_DATA(i)	pde_data(i)
-
 extern void *proc_get_parent_data(const struct inode *);
 extern void proc_remove(struct proc_dir_entry *);
 extern void remove_proc_entry(const char *, struct proc_dir_entry *);
@@ -202,7 +200,7 @@ proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent,
 
 static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {}
 static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {}
-static inline void *PDE_DATA(const struct inode *inode) {BUG(); return NULL;}
+static inline void *pde_data(const struct inode *inode) {BUG(); return NULL;}
 static inline void *proc_get_parent_data(const struct inode *inode) { BUG(); return NULL; }
 
 static inline void proc_remove(struct proc_dir_entry *de) {}
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 72dbb44a4573..88cc16444b43 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -209,7 +209,7 @@ static const struct file_operations __name ## _fops = {			\
 #define DEFINE_PROC_SHOW_ATTRIBUTE(__name)				\
 static int __name ## _open(struct inode *inode, struct file *file)	\
 {									\
-	return single_open(file, __name ## _show, PDE_DATA(inode));	\
+	return single_open(file, __name ## _show, pde_data(inode));	\
 }									\
 									\
 static const struct proc_ops __name ## _proc_ops = {			\
-- 
cgit v1.2.3


From 2dba5eb1c73b6ba2988ced07250edeac0f8cbf5a Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 21 Jan 2022 22:14:27 -0800
Subject: lib/stackdepot: allow optional init and stack_table allocation by
 kvmalloc()

Currently, enabling CONFIG_STACKDEPOT means its stack_table will be
allocated from memblock, even if stack depot ends up not actually used.
The default size of stack_table is 4MB on 32-bit, 8MB on 64-bit.

This is fine for use-cases such as KASAN which is also a config option
and has overhead on its own.  But it's an issue for functionality that
has to be actually enabled on boot (page_owner) or depends on hardware
(GPU drivers) and thus the memory might be wasted.  This was raised as
an issue [1] when attempting to add stackdepot support for SLUB's debug
object tracking functionality.  It's common to build kernels with
CONFIG_SLUB_DEBUG and enable slub_debug on boot only when needed, or
create only specific kmem caches with debugging for testing purposes.

It would thus be more efficient if stackdepot's table was allocated only
when actually going to be used.  This patch thus makes the allocation
(and whole stack_depot_init() call) optional:

 - Add a CONFIG_STACKDEPOT_ALWAYS_INIT flag to keep using the current
   well-defined point of allocation as part of mem_init(). Make
   CONFIG_KASAN select this flag.

 - Other users have to call stack_depot_init() as part of their own init
   when it's determined that stack depot will actually be used. This may
   depend on both config and runtime conditions. Convert current users
   which are page_owner and several in the DRM subsystem. Same will be
   done for SLUB later.

 - Because the init might now be called after the boot-time memblock
   allocation has given all memory to the buddy allocator, change
   stack_depot_init() to allocate stack_table with kvmalloc() when
   memblock is no longer available. Also handle allocation failure by
   disabling stackdepot (could have theoretically happened even with
   memblock allocation previously), and don't unnecessarily align the
   memblock allocation to its own size anymore.

[1] https://lore.kernel.org/all/CAMuHMdW=eoVzM1Re5FVoEN87nKfiLmM2+Ah7eNu2KXEhCvbZyA@mail.gmail.com/

Link: https://lkml.kernel.org/r/20211013073005.11351-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Marco Elver <elver@google.com> # stackdepot
Cc: Marco Elver <elver@google.com>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Oliver Glitta <glittao@gmail.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
From: Colin Ian King <colin.king@canonical.com>
Subject: lib/stackdepot: fix spelling mistake and grammar in pr_err message

There is a spelling mistake of the work allocation so fix this and
re-phrase the message to make it easier to read.

Link: https://lkml.kernel.org/r/20211015104159.11282-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
From: Vlastimil Babka <vbabka@suse.cz>
Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup

On FLATMEM, we call page_ext_init_flatmem_late() just before
kmem_cache_init() which means stack_depot_init() (called by page owner
init) will not recognize properly it should use kvmalloc() and not
memblock_alloc().  memblock_alloc() will also not issue a warning and
return a block memory that can be invalid and cause kernel page fault when
saving stacks, as reported by the kernel test robot [1].

Fix this by moving page_ext_init_flatmem_late() below kmem_cache_init() so
that slab_is_available() is true during stack_depot_init().  SPARSEMEM
doesn't have this issue, as it doesn't do page_ext_init_flatmem_late(),
but a different page_ext_init() even later in the boot process.

Thanks to Mike Rapoport for pointing out the FLATMEM init ordering issue.

While at it, also actually resolve a checkpatch warning in stack_depot_init()
from DRM CI, which was supposed to be in the original patch already.

[1] https://lore.kernel.org/all/20211014085450.GC18719@xsang-OptiPlex-9020/

Link: https://lkml.kernel.org/r/6abd9213-19a9-6d58-cedc-2414386d2d81@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: kernel test robot <oliver.sang@intel.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
From: Vlastimil Babka <vbabka@suse.cz>
Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup3

Due to cd06ab2fd48f ("drm/locking: add backtrace for locking contended
locks without backoff") landing recently to -next adding a new stack depot
user in drivers/gpu/drm/drm_modeset_lock.c we need to add an appropriate
call to stack_depot_init() there as well.

Link: https://lkml.kernel.org/r/2a692365-cfa1-64f2-34e0-8aa5674dce5e@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: Marco Elver <elver@google.com>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Oliver Glitta <glittao@gmail.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
From: Vlastimil Babka <vbabka@suse.cz>
Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup4

Due to 4e66934eaadc ("lib: add reference counting tracking
infrastructure") landing recently to net-next adding a new stack depot
user in lib/ref_tracker.c we need to add an appropriate call to
stack_depot_init() there as well.

Link: https://lkml.kernel.org/r/45c1b738-1a2f-5b5f-2f6d-86fab206d01c@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Cc: Jiri Slab <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ref_tracker.h |  2 ++
 include/linux/stackdepot.h  | 25 ++++++++++++++++---------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index c11c9db5825c..60f3453be23e 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -4,6 +4,7 @@
 #include <linux/refcount.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/stackdepot.h>
 
 struct ref_tracker;
 
@@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 	spin_lock_init(&dir->lock);
 	dir->quarantine_avail = quarantine_count;
 	refcount_set(&dir->untracked, 1);
+	stack_depot_init();
 }
 
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index c34b55a6e554..17f992fe6355 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -19,6 +19,22 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 					unsigned int nr_entries,
 					gfp_t gfp_flags, bool can_alloc);
 
+/*
+ * Every user of stack depot has to call this during its own init when it's
+ * decided that it will be calling stack_depot_save() later.
+ *
+ * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
+ * enabled as part of mm_init(), for subsystems where it's known at compile time
+ * that stack depot will be used.
+ */
+int stack_depot_init(void);
+
+#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
+static inline int stack_depot_early_init(void)	{ return stack_depot_init(); }
+#else
+static inline int stack_depot_early_init(void)	{ return 0; }
+#endif
+
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
 				      unsigned int nr_entries, gfp_t gfp_flags);
 
@@ -30,13 +46,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
 
 void stack_depot_print(depot_stack_handle_t stack);
 
-#ifdef CONFIG_STACKDEPOT
-int stack_depot_init(void);
-#else
-static inline int stack_depot_init(void)
-{
-	return 0;
-}
-#endif	/* CONFIG_STACKDEPOT */
-
 #endif
-- 
cgit v1.2.3


From 0a4ee518185e902758191d968600399f3bc2be31 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:34 -0800
Subject: mm: remove cleancache

Patch series "remove Xen tmem leftovers".

Since the removal of the Xen tmem driver in 2019, the cleancache hooks
are entirely unused, as are large parts of frontswap.  This series
against linux-next (with the folio changes included) removes
cleancaches, and cuts down frontswap to the bits actually used by zswap.

This patch (of 13):

The cleancache subsystem is unused since the removal of Xen tmem driver
in commit 814bbf49dcd0 ("xen: remove tmem driver").

[akpm@linux-foundation.org: remove now-unreachable code]

Link: https://lkml.kernel.org/r/20211224062246.1258487-1-hch@lst.de
Link: https://lkml.kernel.org/r/20211224062246.1258487-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cleancache.h | 124 ---------------------------------------------
 include/linux/fs.h         |   5 --
 2 files changed, 129 deletions(-)
 delete mode 100644 include/linux/cleancache.h

(limited to 'include')

diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
deleted file mode 100644
index 5f5730c1d324..000000000000
--- a/include/linux/cleancache.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_CLEANCACHE_H
-#define _LINUX_CLEANCACHE_H
-
-#include <linux/fs.h>
-#include <linux/exportfs.h>
-#include <linux/mm.h>
-
-#define CLEANCACHE_NO_POOL		-1
-#define CLEANCACHE_NO_BACKEND		-2
-#define CLEANCACHE_NO_BACKEND_SHARED	-3
-
-#define CLEANCACHE_KEY_MAX 6
-
-/*
- * cleancache requires every file with a page in cleancache to have a
- * unique key unless/until the file is removed/truncated.  For some
- * filesystems, the inode number is unique, but for "modern" filesystems
- * an exportable filehandle is required (see exportfs.h)
- */
-struct cleancache_filekey {
-	union {
-		ino_t ino;
-		__u32 fh[CLEANCACHE_KEY_MAX];
-		u32 key[CLEANCACHE_KEY_MAX];
-	} u;
-};
-
-struct cleancache_ops {
-	int (*init_fs)(size_t);
-	int (*init_shared_fs)(uuid_t *uuid, size_t);
-	int (*get_page)(int, struct cleancache_filekey,
-			pgoff_t, struct page *);
-	void (*put_page)(int, struct cleancache_filekey,
-			pgoff_t, struct page *);
-	void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t);
-	void (*invalidate_inode)(int, struct cleancache_filekey);
-	void (*invalidate_fs)(int);
-};
-
-extern int cleancache_register_ops(const struct cleancache_ops *ops);
-extern void __cleancache_init_fs(struct super_block *);
-extern void __cleancache_init_shared_fs(struct super_block *);
-extern int  __cleancache_get_page(struct page *);
-extern void __cleancache_put_page(struct page *);
-extern void __cleancache_invalidate_page(struct address_space *, struct page *);
-extern void __cleancache_invalidate_inode(struct address_space *);
-extern void __cleancache_invalidate_fs(struct super_block *);
-
-#ifdef CONFIG_CLEANCACHE
-#define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
-{
-	return mapping->host->i_sb->cleancache_poolid >= 0;
-}
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-	return cleancache_fs_enabled_mapping(page->mapping);
-}
-#else
-#define cleancache_enabled (0)
-#define cleancache_fs_enabled(_page) (0)
-#define cleancache_fs_enabled_mapping(_page) (0)
-#endif
-
-/*
- * The shim layer provided by these inline functions allows the compiler
- * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
- * is disabled, to a single global variable check if CONFIG_CLEANCACHE
- * is enabled but no cleancache "backend" has dynamically enabled it,
- * and, for the most frequent cleancache ops, to a single global variable
- * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
- * and a cleancache backend has dynamically enabled cleancache, but the
- * filesystem referenced by that cleancache op has not enabled cleancache.
- * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
- * no measurable performance impact.
- */
-
-static inline void cleancache_init_fs(struct super_block *sb)
-{
-	if (cleancache_enabled)
-		__cleancache_init_fs(sb);
-}
-
-static inline void cleancache_init_shared_fs(struct super_block *sb)
-{
-	if (cleancache_enabled)
-		__cleancache_init_shared_fs(sb);
-}
-
-static inline int cleancache_get_page(struct page *page)
-{
-	if (cleancache_enabled && cleancache_fs_enabled(page))
-		return __cleancache_get_page(page);
-	return -1;
-}
-
-static inline void cleancache_put_page(struct page *page)
-{
-	if (cleancache_enabled && cleancache_fs_enabled(page))
-		__cleancache_put_page(page);
-}
-
-static inline void cleancache_invalidate_page(struct address_space *mapping,
-					struct page *page)
-{
-	/* careful... page->mapping is NULL sometimes when this is called */
-	if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
-		__cleancache_invalidate_page(mapping, page);
-}
-
-static inline void cleancache_invalidate_inode(struct address_space *mapping)
-{
-	if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
-		__cleancache_invalidate_inode(mapping);
-}
-
-static inline void cleancache_invalidate_fs(struct super_block *sb)
-{
-	if (cleancache_enabled)
-		__cleancache_invalidate_fs(sb);
-}
-
-#endif /* _LINUX_CLEANCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9617dea24978..f3daaea16554 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1535,11 +1535,6 @@ struct super_block {
 
 	const struct dentry_operations *s_d_op; /* default d_op for dentries */
 
-	/*
-	 * Saved pool identifier for cleancache (-1 means none)
-	 */
-	int cleancache_poolid;
-
 	struct shrinker s_shrink;	/* per-sb shrinker handle */
 
 	/* Number of inodes with nlink == 0 but still referenced */
-- 
cgit v1.2.3


From 3d6035f136009f9cae380022754cba31f32570c5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:38 -0800
Subject: frontswap: remove frontswap_writethrough

frontswap_writethrough is never called, so remove it.

Link: https://lkml.kernel.org/r/20211224062246.1258487-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index b07d88c92bb2..4a03fda41572 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -26,7 +26,6 @@ struct frontswap_ops {
 extern void frontswap_register_ops(struct frontswap_ops *ops);
 extern void frontswap_shrink(unsigned long);
 extern unsigned long frontswap_curr_pages(void);
-extern void frontswap_writethrough(bool);
 #define FRONTSWAP_HAS_EXCLUSIVE_GETS
 extern void frontswap_tmem_exclusive_gets(bool);
 
-- 
cgit v1.2.3


From 71024cb4a0bfe7767aec7a128d0a1a13a37b7fcd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:41 -0800
Subject: frontswap: remove frontswap_tmem_exclusive_gets

frontswap_tmem_exclusive_gets is never called, so remove it.

Link: https://lkml.kernel.org/r/20211224062246.1258487-4-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 4a03fda41572..83a56392cc7f 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -26,8 +26,6 @@ struct frontswap_ops {
 extern void frontswap_register_ops(struct frontswap_ops *ops);
 extern void frontswap_shrink(unsigned long);
 extern unsigned long frontswap_curr_pages(void);
-#define FRONTSWAP_HAS_EXCLUSIVE_GETS
-extern void frontswap_tmem_exclusive_gets(bool);
 
 extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
 extern void __frontswap_init(unsigned type, unsigned long *map);
-- 
cgit v1.2.3


From 0b364446d734da76e421dbfb09e5268270cefaf0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:44 -0800
Subject: frontswap: remove frontswap_shrink

frontswap_shrink is never called, so remove it.

Link: https://lkml.kernel.org/r/20211224062246.1258487-5-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 83a56392cc7f..d268d7bb6513 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -24,7 +24,6 @@ struct frontswap_ops {
 };
 
 extern void frontswap_register_ops(struct frontswap_ops *ops);
-extern void frontswap_shrink(unsigned long);
 extern unsigned long frontswap_curr_pages(void);
 
 extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
-- 
cgit v1.2.3


From 3e8e1af63d7a831f576477c25d9b89049bd2d53d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:47 -0800
Subject: frontswap: remove frontswap_curr_pages

frontswap_curr_pages is never called, so remove it.

Link: https://lkml.kernel.org/r/20211224062246.1258487-6-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index d268d7bb6513..5205c2977b20 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -24,7 +24,6 @@ struct frontswap_ops {
 };
 
 extern void frontswap_register_ops(struct frontswap_ops *ops);
-extern unsigned long frontswap_curr_pages(void);
 
 extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
 extern void __frontswap_init(unsigned type, unsigned long *map);
-- 
cgit v1.2.3


From 1cf53c894d15dd4b73397a56fa055d76d3db66b4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:51 -0800
Subject: frontswap: simplify frontswap_init

Just use IS_ENABLED() and remove the __frontswap_init indirection.

Also remove the unused export.

Link: https://lkml.kernel.org/r/20211224062246.1258487-7-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 5205c2977b20..73d7beb44f2b 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -26,7 +26,7 @@ struct frontswap_ops {
 extern void frontswap_register_ops(struct frontswap_ops *ops);
 
 extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
-extern void __frontswap_init(unsigned type, unsigned long *map);
+extern void frontswap_init(unsigned type, unsigned long *map);
 extern int __frontswap_store(struct page *page);
 extern int __frontswap_load(struct page *page);
 extern void __frontswap_invalidate_page(unsigned, pgoff_t);
@@ -107,11 +107,4 @@ static inline void frontswap_invalidate_area(unsigned type)
 		__frontswap_invalidate_area(type);
 }
 
-static inline void frontswap_init(unsigned type, unsigned long *map)
-{
-#ifdef CONFIG_FRONTSWAP
-	__frontswap_init(type, map);
-#endif
-}
-
 #endif /* _LINUX_FRONTSWAP_H */
-- 
cgit v1.2.3


From 10a9c496789fe2098bfc018650fc77b23ba08a54 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:14:57 -0800
Subject: mm: simplify try_to_unuse

Remove the unused frontswap and pages_to_unuse arguments, and mark the
function static now that the caller in frontswap is gone.

[akpm@linux-foundation.org: fix shmem_unuse() stub, per Matthew]

Link: https://lkml.kernel.org/r/20211224062246.1258487-9-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 7 -------
 include/linux/shmem_fs.h  | 3 +--
 include/linux/swapfile.h  | 1 -
 3 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 73d7beb44f2b..a9817d4fa74c 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -7,13 +7,6 @@
 #include <linux/bitops.h>
 #include <linux/jump_label.h>
 
-/*
- * Return code to denote that requested number of
- * frontswap pages are unused(moved to page cache).
- * Used in shmem_unuse and try_to_unuse.
- */
-#define FRONTSWAP_PAGES_UNUSED	2
-
 struct frontswap_ops {
 	void (*init)(unsigned); /* this swap type was just swapon'ed */
 	int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 166158b6e917..e65b80ed09e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -83,8 +83,7 @@ extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
-extern int shmem_unuse(unsigned int type, bool frontswap,
-		       unsigned long *fs_pages_to_unuse);
+int shmem_unuse(unsigned int type);
 
 extern bool shmem_is_huge(struct vm_area_struct *vma,
 			  struct inode *inode, pgoff_t index);
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e06febf62978..809cd01ef2c5 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -9,7 +9,6 @@
 extern spinlock_t swap_lock;
 extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
-extern int try_to_unuse(unsigned int, bool, unsigned long);
 extern unsigned long generic_max_swapfile_size(void);
 extern unsigned long max_swapfile_size(void);
 
-- 
cgit v1.2.3


From bd9cd521496ba8d537d8f46f4167bf4221aba9a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:15:01 -0800
Subject: frontswap: remove frontswap_test

frontswap_test is unused now, remove it.

Link: https://lkml.kernel.org/r/20211224062246.1258487-10-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index a9817d4fa74c..c5b2848d2240 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -18,7 +18,6 @@ struct frontswap_ops {
 
 extern void frontswap_register_ops(struct frontswap_ops *ops);
 
-extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
 extern void frontswap_init(unsigned type, unsigned long *map);
 extern int __frontswap_store(struct page *page);
 extern int __frontswap_load(struct page *page);
@@ -33,11 +32,6 @@ static inline bool frontswap_enabled(void)
 	return static_branch_unlikely(&frontswap_enabled_key);
 }
 
-static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
-{
-	return __frontswap_test(sis, offset);
-}
-
 static inline void frontswap_map_set(struct swap_info_struct *p,
 				     unsigned long *map)
 {
@@ -56,11 +50,6 @@ static inline bool frontswap_enabled(void)
 	return false;
 }
 
-static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
-{
-	return false;
-}
-
 static inline void frontswap_map_set(struct swap_info_struct *p,
 				     unsigned long *map)
 {
-- 
cgit v1.2.3


From 633423a09cb5cfe61438283e1ce49c23cf4a0611 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:15:07 -0800
Subject: mm: mark swap_lock and swap_active_head static

swap_lock and swap_active_head are only used in swapfile.c, so mark them
static.

Link: https://lkml.kernel.org/r/20211224062246.1258487-12-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapfile.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 809cd01ef2c5..54078542134c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,8 +6,6 @@
  * these were static in swapfile.c but frontswap.c needs them and we don't
  * want to expose them to the dozens of source files that include swap.h
  */
-extern spinlock_t swap_lock;
-extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern unsigned long generic_max_swapfile_size(void);
 extern unsigned long max_swapfile_size(void);
-- 
cgit v1.2.3


From 1da0d94a3ec8c5f3793b7be8538b55e60ebeefe3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Jan 2022 22:15:10 -0800
Subject: frontswap: remove support for multiple ops

There is only a single instance of frontswap ops in the kernel, so
simplify the frontswap code by removing support for multiple operations.

Link: https://lkml.kernel.org/r/20211224062246.1258487-13-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Konrad Rzeszutek Wilk <Konrad.wilk@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index c5b2848d2240..a631bac12220 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -13,10 +13,9 @@ struct frontswap_ops {
 	int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
 	void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
 	void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
-	struct frontswap_ops *next; /* private pointer to next ops */
 };
 
-extern void frontswap_register_ops(struct frontswap_ops *ops);
+int frontswap_register_ops(const struct frontswap_ops *ops);
 
 extern void frontswap_init(unsigned type, unsigned long *map);
 extern int __frontswap_store(struct page *page);
-- 
cgit v1.2.3


From 25d490eb46486e88c16e64d9eb7cfd33a642d596 Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Sat, 18 Dec 2021 09:30:40 +0100
Subject: ARM: 9172/1: amba: Cleanup amba pclk operation

There is no user about amba_pclk_[un]prepare() besides pl330.c,
directly use clk_[un]prepare(). After this, all the function about
amba pclk operation, enable, disable, [un]prepare could be killed.

Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 include/linux/amba/bus.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 6c7f47846971..09174970b855 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -121,26 +121,6 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
-static inline int amba_pclk_enable(struct amba_device *dev)
-{
-	return clk_enable(dev->pclk);
-}
-
-static inline void amba_pclk_disable(struct amba_device *dev)
-{
-	clk_disable(dev->pclk);
-}
-
-static inline int amba_pclk_prepare(struct amba_device *dev)
-{
-	return clk_prepare(dev->pclk);
-}
-
-static inline void amba_pclk_unprepare(struct amba_device *dev)
-{
-	clk_unprepare(dev->pclk);
-}
-
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
-- 
cgit v1.2.3


From dacf3ca134d0dc105caee77651a349a86bd77456 Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Sat, 18 Dec 2021 09:30:41 +0100
Subject: ARM: 9173/1: amba: kill amba_find_match()

There is no one use amba_find_match(), kill it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 include/linux/amba/bus.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 09174970b855..6562f543c3e0 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -117,7 +117,6 @@ void amba_device_put(struct amba_device *);
 int amba_device_add(struct amba_device *, struct resource *);
 int amba_device_register(struct amba_device *, struct resource *);
 void amba_device_unregister(struct amba_device *);
-struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
-- 
cgit v1.2.3